1 /*   sqnutil4.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  sqnutil4.c
27 *
28 * Author:  Colleen Bollin
29 *
30 * Version Creation Date:   12/27/2007
31 *
32 * $Revision: 1.259 $
33 *
34 * File Description:
35 * This file contains functions for automatically generating definition lines.
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date     Name        Description of modification
40 * -------  ----------  -----------------------------------------------------
41 *
42 *
43 * ==========================================================================
44 */
45 #include <sqnutils.h>
46 #include <ncbilang.h>
47 #include <objfdef.h>
48 #include <gather.h>
49 #include <explore.h>
50 #include <edutil.h>
51 #include <salutil.h>
52 #include <tofasta.h>
53 #include <gbftdef.h>
54 #include <gbfeat.h>
55 #include <findrepl.h>
56 #include <salpacc.h>
57 #include <salpedit.h>
58 #include <alignmgr.h>
59 #include <alignmgr2.h>
60 #include <explore.h>
61 #define NLM_GENERATED_CODE_PROTO
62 #include <objmacro.h>
63 #include <macroapi.h>
64 #include <tax3api.h>
65 
66 /* This is a list of the modifiers that are of interest */
67 /* Note that if you modify the DefLineModifiers array, */
68 /* you should make the corresponding change to the DefLinePos enum. */
69 
70 ModifierItemGlobalData DefLineModifiers[] = {
71   { "Bio-material"         , TRUE,  ORGMOD_bio_material         },
72   { "Biotype"              , TRUE , ORGMOD_biotype              },
73   { "Biovar"               , TRUE , ORGMOD_biovar               },
74   { "Breed"                , TRUE , ORGMOD_breed                },
75   { "Cell-line"            , FALSE, SUBSRC_cell_line            },
76   { "Chemovar"             , TRUE , ORGMOD_chemovar             },
77   { "Chromosome"           , FALSE, SUBSRC_chromosome           },
78   { "Clone"                , FALSE, SUBSRC_clone                },
79   { "Country"              , FALSE, SUBSRC_country              },
80   { "Cultivar"             , TRUE , ORGMOD_cultivar             },
81   { "Culture-collection"   , TRUE , ORGMOD_culture_collection   },
82   { "Dev-stage"            , FALSE, SUBSRC_dev_stage            },
83   { "Ecotype"              , TRUE , ORGMOD_ecotype              },
84   { "Endogenous-virus-name", FALSE, SUBSRC_endogenous_virus_name},
85   { "Genotype"             , FALSE, SUBSRC_genotype             },
86   { "Haplogroup"           , FALSE, SUBSRC_haplogroup           },
87   { "Haplotype"            , FALSE, SUBSRC_haplotype            },
88   { "Isolate"              , TRUE , ORGMOD_isolate              },
89   { "Linkage-group"        , FALSE, SUBSRC_linkage_group        },
90   { "Map"                  , FALSE, SUBSRC_map                  },
91   { "Pathovar"             , TRUE , ORGMOD_pathovar             },
92   { "Plasmid-name"         , FALSE, SUBSRC_plasmid_name         },
93   { "Pop-variant"          , FALSE, SUBSRC_pop_variant          },
94   { "Segment"              , FALSE, SUBSRC_segment              },
95   { "Serogroup"            , TRUE , ORGMOD_serogroup            },
96   { "Serotype"             , TRUE , ORGMOD_serotype             },
97   { "Serovar"              , TRUE , ORGMOD_serovar              },
98   { "Specimen voucher"     , TRUE , ORGMOD_specimen_voucher     },
99   { "Strain"               , TRUE , ORGMOD_strain               },
100   { "Subclone"             , FALSE, SUBSRC_subclone             },
101   { "Substrain"            , TRUE , ORGMOD_substrain            },
102   { "Transgenic"           , FALSE, SUBSRC_transgenic           }
103 };
104 
105 #define numDefLineModifiers (sizeof (DefLineModifiers) / sizeof (ModifierItemGlobalData))
106 
NumDefLineModifiers(void)107 NLM_EXTERN size_t NumDefLineModifiers (void)
108 
109 {
110   return numDefLineModifiers;
111 }
112 
MergeValNodeStrings(ValNodePtr list,Boolean useReturn)113 NLM_EXTERN CharPtr MergeValNodeStrings (ValNodePtr list, Boolean useReturn)
114 
115 {
116   size_t      len;
117   CharPtr     ptr;
118   CharPtr     str;
119   CharPtr     tmp;
120   ValNodePtr  vnp;
121 
122 
123   ptr = NULL;
124   if (list != NULL) {
125     vnp = list;
126     len = 0;
127     while (vnp != NULL) {
128       if (vnp->data.ptrvalue != NULL) {
129         len += StringLen ((CharPtr) vnp->data.ptrvalue) + 1;
130       }
131       vnp = vnp->next;
132     }
133     if (len > 0) {
134       ptr = MemNew (sizeof (Char) * (len + 2));
135       if (ptr != NULL) {
136         vnp = list;
137         tmp = NULL;
138         while (vnp != NULL) {
139           str = (CharPtr) vnp->data.ptrvalue;
140           if (str != NULL) {
141             if (tmp == NULL) {
142               tmp = ptr;
143             } else if (useReturn) {
144               tmp = StringMove (tmp, "\n");
145             } else if (IsJapanese () && (tmp - ptr > 2) &&
146                     IsMBLetter (tmp - 2) && IsMBLetter (str)) {
147               /* no space required between two Japanese letters. */
148               tmp = tmp;
149             } else if (str [0] != ',' && str [0] != ';' && str [0] != ':') {
150               tmp = StringMove (tmp, " ");
151             } else {
152               tmp = StringMove (tmp, " ");
153             }
154             tmp = StringMove (tmp, str);
155           }
156           vnp = vnp->next;
157         }
158       }
159     }
160   }
161   return ptr;
162 }
163 
164 
165 /* The matchFunction functions are used to identify features that meet
166  * specific requirements, usually that the feature is of a particular type.
167  * This function is used instead of simply using the subtype for the feature
168  * because some features are identified based on the contents or presence of
169  * certain modifiers.
170  * Functions of this type should always return FALSE if handed a NULL argument.
171  */
172 typedef Boolean (LIBCALLBACK *matchFunction) (
173   SeqFeatPtr sfp
174 );
175 
176 static void ListClauses (
177   ValNodePtr clauselist,
178   ValNodePtr PNTR strings,
179   Boolean    allow_semicolons,
180   Boolean    suppress_final_and,
181   Boolean    suppress_allele
182 );
183 
184 static void LabelClauses
185 ( ValNodePtr clause_list,
186   Uint1      biomol,
187   BioseqPtr  bsp,
188   DeflineFeatureRequestListPtr rp);
189 
190 static CharPtr GetProductName
191 ( SeqFeatPtr cds,
192   BioseqPtr  bsp,
193   DeflineFeatureRequestListPtr rp);
194 
195 #define DEFLINE_FEATLIST    1
196 #define DEFLINE_CLAUSEPLUS  2
197 #define DEFLINE_REMOVEFEAT  3
198 
199 typedef struct featurelabeldata {
200   Boolean pluralizable;
201   Boolean is_typeword_first;
202   CharPtr typeword;
203   CharPtr description;
204   CharPtr productname;
205 } FeatureLabelData, PNTR FeatureLabelPtr;
206 
207 
208 typedef struct featureclause {
209   ValNodePtr       featlist;
210   FeatureLabelData feature_label_data;
211   CharPtr          allelename;
212   CharPtr          interval;
213   Boolean          is_alt_spliced;
214   Boolean          has_mrna;
215   SeqLocPtr        slp;
216   GeneRefPtr       grp;
217   Boolean          clause_info_only;
218   Boolean          is_unknown;
219   Boolean          make_plural;
220   Boolean          delete_me;
221   /* this information used only for segments */
222   Int2             numivals;
223   Int4Ptr          ivals;
224 } FeatureClauseData, PNTR FeatureClausePtr;
225 
226 FeatureClausePtr NewFeatureClause (
227   SeqFeatPtr sfp,
228   BioseqPtr bsp,
229   DeflineFeatureRequestListPtr rp);
230 
231 static void PluralizeConsolidatedClauseDescription (
232   FeatureClausePtr fcp
233 );
234 
235 typedef Boolean (LIBCALLBACK *ShouldRemoveFunction) (
236   SeqFeatPtr sfp,
237   FeatureClausePtr parent_fcp,
238   FeatureClausePtr this_fcp,
239   BioseqPtr        bsp,
240   Boolean          isLonely,
241   Boolean          isRequested,
242   Boolean          isSegment,
243   DeflineFeatureRequestListPtr rp
244 );
245 
246 /* This section of the code contains some functions for dealing with
247  * linked lists of strings */
248 
249 /* This function finds the first occurrence of "search" in one of the
250  * strings in list "strings".
251  * "search" could be part of the string or could be the entire string.
252  */
FindStringInStrings(ValNodePtr strings,CharPtr search)253 static ValNodePtr FindStringInStrings (
254   ValNodePtr strings,
255   CharPtr search
256 )
257 {
258   while (strings != NULL)
259   {
260     if (DoesStringContainPhrase (strings->data.ptrvalue, search, TRUE, TRUE))
261     {
262       return strings;
263     }
264     strings = strings->next;
265   }
266   return NULL;
267 }
268 
269 //LCOV_EXCL_START
270 //Not part of Autodef or Cleanup
FindExactStringListMatch(ValNodePtr list,CharPtr value)271 NLM_EXTERN ValNodePtr FindExactStringListMatch (
272   ValNodePtr list,
273   CharPtr value
274 )
275 
276 {
277   CharPtr     str;
278   ValNodePtr  vnp;
279 
280   for (vnp = list; vnp != NULL; vnp = vnp->next) {
281     str = (CharPtr) vnp->data.ptrvalue;
282     if (StringCmp (str, value) == 0) return vnp;
283   }
284 
285   return NULL;
286 }
287 
288 //Not part of Autodef or Cleanup
289 /* This function creates a new linked list of strings with copies of
290  * contents of orig.
291  */
CopyStrings(ValNodePtr orig)292 static ValNodePtr CopyStrings (
293   ValNodePtr orig
294 )
295 {
296   ValNodePtr new_string_start = NULL;
297 
298   while (orig != NULL)
299   {
300     ValNodeAddStr (&new_string_start, 0,
301       StringSave (orig->data.ptrvalue));
302     orig = orig->next;
303   }
304   return new_string_start;
305 }
306 //LCOV_EXCL_STOP
307 
308 /*
309  * This section of the code contains functions and structures for obtaining a
310  * description of the organism in the record, including functions for finding
311  * the combination of modifiers that will make each organism description
312  * unique.
313  * The method used for determining the best combination of modifiers involves
314  * creating a list of required modifiers, and then creating a list of
315  * combinations of modifiers by adding modifiers one at a time
316  * to see if the additional modifiers provide any more differentiation in
317  * the list.
318  * In order to do this, I start with a list of required modifiers, and
319  * then create copies of this list.  For each copy I add one of the modifiers
320  * that are present in the bio sources and not already on the list.
321  * If adding the modifier increases the differentiation, I add that copy to
322  * the list of possible combinations, otherwise I discard it.
323  * I then make copies of all of the new items I added to the list and
324  * add another modifier to each list, keeping the combinations that increase
325  * the differentiation and discarding the rest.
326  * This process continues until I have a combination that produces completely
327  * differentiated bio sources, or I run out of possible combinations.
328  * If I run out of possible combinations, I select the best combination from
329  * the list.
330  * This search process occurs in FindBestCombo.  The majority of the functions
331  * in this section are here to support FindBestCombo, specifically to create,
332  * copy, and grow lists of combinations.
333  */
334 
335 /* BioSrcDescData is used to calculate the best possible combination of
336  * source and organism modifiers for uniqueness.
337  * biop contains the BioSourcePtr from a sequence in the record.
338  * strings contains a list of string representations of the modifiers
339  * for this combination for this organism.
340  */
341 typedef struct biosrcdescdata {
342   BioSourcePtr  biop;
343   ValNodePtr    strings;
344   Pointer       next;
345 } BioSrcDescData, PNTR BioSrcDescPtr;
346 
347 /* OrgGroupData is used to calculate the best possible combination of
348  * source and organism modifiers for uniqueness.
349  * org_list is a list of all organisms that have identical descriptions
350  * using the current set of modifiers.
351  * num_organisms contains the number of organisms with identical descriptions.
352  */
353 typedef struct orggroupdata {
354   BioSrcDescPtr org_list;
355   Int4          num_organisms;
356   Pointer       next;
357 } OrgGroupData, PNTR OrgGroupPtr;
358 
359 /* ModifierCombinationData is used to calculate the best possible combination
360  * of source and organism modifiers for uniqueness.
361  * num_groups is the number of groups of organisms with identical descriptions
362  *           using the modifiers specified in modifier_indices.
363  * num_mods is the number of modifiers specified in modifier_indices.
364  * max_orgs_in_group is the maximum number of organisms in any one group.
365  * num_unique_orgs is the number of organisms that are alone in their groups
366  *           i.e., their description is unique.
367  * modifier_indices is the list of modifier indices for this combination.
368  * group_list is the list of groups of organisms with identical descriptions
369  *           using the modifiers specified in modifier_indices.
370  */
371 typedef struct modifiercombination {
372   Int4         num_groups;
373   Int4         num_mods;
374   Int4         max_orgs_in_group;
375   Int4         num_unique_orgs;
376   ValNodePtr   modifier_indices;
377   OrgGroupPtr  group_list;
378   Pointer      next;
379 } ModifierCombinationData, PNTR ModifierCombinationPtr;
380 
IsDeflineModifierRequiredByDefault(Boolean is_orgmod,Int2 index)381 static Boolean IsDeflineModifierRequiredByDefault (Boolean is_orgmod, Int2 index)
382 {
383   if (!is_orgmod
384       && (index == SUBSRC_endogenous_virus_name
385           || index == SUBSRC_plasmid_name
386           || index == SUBSRC_transgenic)) {
387     return TRUE;
388   } else {
389     return FALSE;
390   }
391 }
392 
393 //LCOV_EXCL_START
394 //Not part of Autodef or Cleanup
AddOneSubtypeField(ValNodePtr PNTR sq_list,SourceQualDescPtr orig,CharPtr str,Uint1 subfield)395 static void AddOneSubtypeField (ValNodePtr PNTR sq_list, SourceQualDescPtr orig, CharPtr str, Uint1 subfield)
396 {
397   SourceQualDescPtr sqdp_cpy;
398 
399   if (sq_list == NULL || orig == NULL) {
400     return;
401   }
402   sqdp_cpy = (SourceQualDescPtr) MemNew (sizeof (SourceQualDescData));
403   MemCpy (sqdp_cpy, orig, sizeof (SourceQualDescData));
404 
405   sqdp_cpy->name = str;
406   sqdp_cpy->subfield = subfield;
407 
408   ValNodeAddPointer (sq_list, 0, sqdp_cpy);
409 }
410 
411 
412 //Not part of Autodef or Cleanup
AddSubtypeFields(ValNodePtr PNTR sq_list,SourceQualDescPtr orig)413 static void AddSubtypeFields (ValNodePtr PNTR sq_list, SourceQualDescPtr orig)
414 {
415   if (sq_list == NULL || orig == NULL) return;
416 
417   if (orig->isOrgMod) {
418     switch (orig->subtype) {
419       case ORGMOD_specimen_voucher:
420         AddOneSubtypeField (sq_list, orig, "specimen-voucher INST", 1);
421         AddOneSubtypeField (sq_list, orig, "specimen-voucher COLL", 2);
422         AddOneSubtypeField (sq_list, orig, "specimen-voucher SpecID", 3);
423         break;
424       case ORGMOD_culture_collection:
425         AddOneSubtypeField (sq_list, orig, "culture-collection INST", 1);
426         AddOneSubtypeField (sq_list, orig, "culture-collection COLL", 2);
427         AddOneSubtypeField (sq_list, orig, "culture-collection SpecID", 3);
428         break;
429       case ORGMOD_bio_material:
430         AddOneSubtypeField (sq_list, orig, "bio-material INST", 1);
431         AddOneSubtypeField (sq_list, orig, "bio-material COLL", 2);
432         AddOneSubtypeField (sq_list, orig, "bio-material SpecID", 3);
433         break;
434     }
435   }
436 }
437 
438 
439 //Not part of Autodef or Cleanup
AddQualList(ValNodePtr PNTR list,Nlm_QualNameAssocPtr qual_list,Boolean is_orgmod,Boolean use_alternate_note_name,Boolean get_subfields)440 static void AddQualList (ValNodePtr PNTR list, Nlm_QualNameAssocPtr qual_list, Boolean is_orgmod, Boolean use_alternate_note_name, Boolean get_subfields)
441 {
442   Int4              k;
443   SourceQualDescPtr sqdp;
444 
445   for (k = 0; qual_list[k].name != NULL; k++) {
446     if (StringHasNoText (qual_list[k].name)) {
447       continue;
448     }
449     sqdp = (SourceQualDescPtr) MemNew (sizeof (SourceQualDescData));
450     if (sqdp != NULL)
451     {
452       if (use_alternate_note_name
453           && ((is_orgmod && qual_list[k].value == ORGMOD_other)
454               || (!is_orgmod && qual_list[k].value == SUBSRC_other)))
455       {
456         if (is_orgmod) {
457           sqdp->name = "Note -- OrgMod";
458         } else {
459           sqdp->name = "Note -- SubSource";
460         }
461       } else {
462         sqdp->name = qual_list[k].name;
463       }
464       sqdp->isOrgMod = is_orgmod;
465       sqdp->subtype = qual_list[k].value;
466       sqdp->subfield = 0;
467       ValNodeAddPointer (list, 0, sqdp);
468     }
469     if (get_subfields) {
470       AddSubtypeFields (list, sqdp);
471     }
472   }
473 }
474 
475 //Not part of Autodef or Cleanup
AddNoteQual(ValNodePtr PNTR list,Boolean is_orgmod,Boolean use_alternate_note_name)476 static void AddNoteQual (ValNodePtr PNTR list, Boolean is_orgmod, Boolean use_alternate_note_name)
477 {
478   SourceQualDescPtr sqdp;
479 
480   if (list == NULL) return;
481 
482   sqdp = (SourceQualDescPtr) MemNew (sizeof (SourceQualDescData));
483   if (sqdp != NULL)
484   {
485     if (use_alternate_note_name)
486     {
487       if (is_orgmod) {
488         sqdp->name = "Note -- OrgMod";
489       } else {
490         sqdp->name = "Note -- SubSource";
491       }
492     } else {
493       sqdp->name = "Note";
494     }
495     sqdp->isOrgMod = is_orgmod;
496     if (is_orgmod) {
497       sqdp->subtype = ORGMOD_other;
498     } else {
499       sqdp->subtype = SUBSRC_other;
500     }
501     sqdp->subfield = 0;
502     ValNodeAddPointer (list, 0, sqdp);
503   }
504 }
505 
506 
507 //Not part of Autodef or Cleanup
SortVnpBySourceQualDesc(VoidPtr ptr1,VoidPtr ptr2)508 NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2)
509 
510 {
511   SourceQualDescPtr     str1;
512   SourceQualDescPtr     str2;
513   ValNodePtr  vnp1;
514   ValNodePtr  vnp2;
515 
516   if (ptr1 != NULL && ptr2 != NULL) {
517     vnp1 = *((ValNodePtr PNTR) ptr1);
518     vnp2 = *((ValNodePtr PNTR) ptr2);
519     if (vnp1 != NULL && vnp2 != NULL) {
520       str1 = (SourceQualDescPtr) vnp1->data.ptrvalue;
521       str2 = (SourceQualDescPtr) vnp2->data.ptrvalue;
522       if (str1 != NULL && str2 != NULL
523           && str1->name != NULL && str2->name != NULL) {
524         return StringICmp (str1->name, str2->name);
525       }
526     }
527   }
528   return 0;
529 }
530 
531 
532 //Not part of Autodef or Cleanup
GetSourceQualDescListEx(Boolean get_subsrc,Boolean get_orgmod,Boolean get_discouraged,Boolean get_discontinued,Boolean get_subfields)533 extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields)
534 {
535   ValNodePtr        source_qual_list = NULL;
536 
537   if (get_orgmod) {
538     AddQualList (&source_qual_list, current_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields);
539     if (get_discouraged) {
540       AddQualList (&source_qual_list, discouraged_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields);
541     }
542     if (get_discontinued) {
543       AddQualList (&source_qual_list, discontinued_orgmod_subtype_alist, TRUE, get_subsrc, get_subfields);
544     }
545     AddNoteQual (&source_qual_list, TRUE, get_subsrc);
546   }
547   if (get_subsrc) {
548     AddQualList (&source_qual_list, current_subsource_subtype_alist, FALSE, get_orgmod, get_subfields);
549     if (get_discouraged) {
550       AddQualList (&source_qual_list, discouraged_subsource_subtype_alist, FALSE, get_orgmod, get_subfields);
551     }
552     if (get_discontinued) {
553       AddQualList (&source_qual_list, discontinued_subsource_subtype_alist, FALSE, get_orgmod, get_subfields);
554     }
555     AddNoteQual (&source_qual_list, FALSE, get_orgmod);
556   }
557 
558   source_qual_list = ValNodeSort (source_qual_list, SortVnpBySourceQualDesc);
559   return source_qual_list;
560 }
561 
562 //Not part of Autodef or Cleanup
GetSourceQualDescList(Boolean get_subsrc,Boolean get_orgmod,Boolean get_discouraged,Boolean get_discontinued)563 extern ValNodePtr GetSourceQualDescList (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued)
564 {
565   return GetSourceQualDescListEx (get_subsrc, get_orgmod, get_discouraged, get_discontinued, TRUE);
566 }
567 
568 //Not part of Autodef or Cleanup
569 /*
570  * The CountModifiersProc is used as the callback function for
571  * VisitBioSourcesInSep when we are getting a list of all the modifiers
572  * that appear in the sources.  We also obtain, for each modifier class,
573  * the first value seen, whether or not each value seen is unique for
574  * for the modifier, and whether or not the modifier is present for all
575  * sources.
576  */
CountModifiersProc(BioSourcePtr biop,Pointer userdata)577 static void CountModifiersProc (
578   BioSourcePtr biop,
579   Pointer userdata
580 )
581 {
582   ModifierItemLocalPtr ItemList;
583   OrgModPtr     mod;
584   SubSourcePtr  ssp;
585   Int2 i;
586   Boolean       found_this_modifier;
587 
588   if (biop == NULL) return;
589   ItemList = (ModifierItemLocalPtr) userdata;
590 
591   for (i=0; i < numDefLineModifiers; i++)
592   {
593     found_this_modifier = FALSE;
594     if (DefLineModifiers[i].isOrgMod)
595     {
596       if ( biop->org != NULL && biop->org->orgname != NULL)
597       {
598         mod = biop->org->orgname->mod;
599         while (mod != NULL
600           && mod->subtype != DefLineModifiers[i].subtype)
601         {
602           mod = mod->next;
603         }
604         if (mod != NULL && mod->subname != NULL)
605         {
606           found_this_modifier = TRUE;
607           if (ItemList[i].first_value_seen != NULL)
608           {
609             if (StringCmp (ItemList[i].first_value_seen, mod->subname) != 0)
610             {
611               ItemList[i].is_unique = FALSE;
612             }
613           }
614           else
615           {
616             ItemList[i].first_value_seen = mod->subname;
617           }
618           if ( FindExactStringListMatch (ItemList[i].values_seen, mod->subname)
619             == NULL)
620           {
621             ValNodeAddStr (&ItemList[i].values_seen, 0, mod->subname);
622           }
623           else
624           {
625             ItemList[i].all_unique = FALSE;
626           }
627         }
628       }
629     } else {
630       ssp = biop->subtype;
631       while (ssp != NULL && ssp->subtype != DefLineModifiers[i].subtype)
632       {
633         ssp = ssp->next;
634       }
635       if (ssp != NULL && ssp->name != NULL)
636       {
637         found_this_modifier = TRUE;
638         if (ItemList[i].first_value_seen != NULL)
639         {
640           if (StringCmp (ItemList[i].first_value_seen, ssp->name) != 0)
641           {
642             ItemList[i].is_unique = FALSE;
643           }
644         }
645         else
646         {
647           ItemList[i].first_value_seen = ssp->name;
648         }
649         if ( FindExactStringListMatch (ItemList[i].values_seen, ssp->name)
650           == NULL)
651         {
652           ValNodeAddStr (&ItemList[i].values_seen, 0, ssp->name);
653         }
654         else
655         {
656           ItemList[i].all_unique = FALSE;
657         }
658       }
659     }
660     if (found_this_modifier)
661     {
662       ItemList[i].any_present = TRUE;
663     } else {
664       ItemList[i].all_present = FALSE;
665     }
666   }
667 }
668 
669 //Not part of Autodef or Cleanup
670 /* The CountModifiers function visits all of the bio sources, determining
671  * which modifiers are present, which modifiers have only one value,
672  * which modifiers have all different values, and which modifiers are
673  * present in all sources.
674  * After this survey is complete, the function prepares a short summary
675  * of the above information for each modifier, which is used in the
676  * definition line options dialog.
677  */
CountModifiers(ModifierItemLocalPtr ItemList,SeqEntryPtr sep)678 NLM_EXTERN void CountModifiers (
679   ModifierItemLocalPtr ItemList,
680   SeqEntryPtr sep
681 )
682 {
683   Int2 i;
684 
685   for (i=0; i < numDefLineModifiers; i++)
686   {
687     ItemList[i].all_present = TRUE;
688     ItemList[i].is_unique = TRUE;
689     ItemList[i].first_value_seen = NULL;
690     ItemList[i].values_seen = NULL;
691     ItemList[i].all_unique = TRUE;
692   }
693 
694   VisitBioSourcesInSep (sep, ItemList, CountModifiersProc);
695 
696   for (i=0; i < numDefLineModifiers; i++)
697   {
698     if (ItemList[i].all_present && ItemList[i].all_unique)
699     {
700       ItemList[i].status = "All present, all unique";
701     }
702     else if (ItemList[i].all_present && ItemList[i].is_unique)
703     {
704       ItemList[i].status = "All present, one unique";
705     }
706     else if (ItemList[i].all_present && ! ItemList[i].is_unique)
707     {
708       ItemList[i].status = "All present, mixed";
709     }
710     else if (! ItemList[i].all_present && ItemList[i].all_unique)
711     {
712       ItemList[i].status = "Some missing, all unique";
713     }
714     else if (! ItemList[i].all_present && ItemList[i].is_unique)
715     {
716       ItemList[i].status = "Some missing, one unique";
717     }
718     else if (! ItemList[i].all_present && ! ItemList[i].is_unique)
719     {
720       ItemList[i].status = "Some missing, mixed";
721     }
722   }
723 }
724 
725 //Not part of Autodef or Cleanup
726 /* The BioSrcDescData structure is used to hold a BioSourcePtr, a list
727  * of strings used to describe the biosource, including the taxonomy name
728  * and the values of all of the modifiers selected so far for this bio
729  * source, and a pointer to the next BioSrcDescData structure in the list.
730  */
731 
732 /* The CopyBioSrcDescPtr function creates a copy of the linked list of
733  * BioSrcDescData structures.
734  */
CopyBioSrcDescPtr(BioSrcDescPtr orig)735 static BioSrcDescPtr CopyBioSrcDescPtr (
736   BioSrcDescPtr orig
737 )
738 {
739   BioSrcDescPtr new_bsdp_start;
740 
741   if (orig == NULL) return NULL;
742 
743   new_bsdp_start = (BioSrcDescPtr) MemNew (sizeof (BioSrcDescData));
744   if (new_bsdp_start == NULL) return NULL;
745 
746   new_bsdp_start->biop = orig->biop;
747   new_bsdp_start->strings = CopyStrings (orig->strings);
748   new_bsdp_start->next = CopyBioSrcDescPtr (orig->next);
749   return new_bsdp_start;
750 }
751 
752 //Not part of Autodef or Cleanup
753 /* The FreeBioSrcDescPtr function frees the memory associated with a
754  * linked list of BioSrcDescData structures.
755  */
FreeBioSrcDescPtr(BioSrcDescPtr bsdp)756 static void FreeBioSrcDescPtr (
757   BioSrcDescPtr bsdp
758 )
759 {
760   if (bsdp == NULL) return;
761   FreeBioSrcDescPtr (bsdp->next);
762   bsdp->biop = NULL;
763   ValNodeFreeData (bsdp->strings);
764   MemFree (bsdp);
765 }
766 
767 
768 //Not part of Autodef or Cleanup
769 /* The AddQualToBioSrcDescPtr function finds the qualifier at the
770  * feature_index position in the DefLineModifiers array in the
771  * BioSourcePtr and adds the value for that modifier to the array
772  * of strings describing the bio source.
773  */
AddQualToBioSrcDescPtr(BioSrcDescPtr bsdp,ModifierItemLocalPtr qual,Int2 feature_index)774 static void AddQualToBioSrcDescPtr (
775   BioSrcDescPtr bsdp,
776   ModifierItemLocalPtr qual,
777   Int2 feature_index
778 )
779 {
780   OrgModPtr          mod;
781   SubSourcePtr       ssp;
782   CharPtr            tmp;
783 
784   if (bsdp == NULL) return;
785   if (bsdp->biop == NULL) return;
786 
787   if (DefLineModifiers[feature_index].isOrgMod)
788   {
789     if (bsdp->biop->org == NULL || bsdp->biop->org->orgname == NULL) return;
790     mod = bsdp->biop->org->orgname->mod;
791     while (mod != NULL
792         && mod->subtype != DefLineModifiers[feature_index].subtype)
793     {
794       mod = mod->next;
795     }
796     if (mod != NULL && mod->subname != NULL)
797     {
798       if (mod->subtype == ORGMOD_specimen_voucher && StringNICmp (mod->subname, "personal:", 9) == 0)
799       {
800         tmp = mod->subname + 9;
801         while (isspace (*tmp))
802         {
803           tmp++;
804         }
805       }
806       else
807       {
808         tmp = mod->subname;
809       }
810       ValNodeCopyStr( &(bsdp->strings), 0, tmp);
811     }
812   } else {
813     ssp = bsdp->biop->subtype;
814     while (ssp != NULL
815         && ssp->subtype != DefLineModifiers[feature_index].subtype)
816     {
817       ssp = ssp->next;
818     }
819     if (ssp != NULL)
820     {
821       if (ssp->subtype == SUBSRC_transgenic)
822       {
823         ValNodeCopyStr( &(bsdp->strings), 0, "transgenic");
824       }
825       else if (ssp->name != NULL)
826       {
827         ValNodeCopyStr( &(bsdp->strings), 0, ssp->name);
828       }
829     }
830   }
831 }
832 
833 //Not part of Autodef or Cleanup
834 /* The CompareOrganismDescriptors function compares the contents of the
835  * lists of strings for each BioSrcDesc item.
836  * The function returns:
837  *     -1 if org1 < org2
838  *      0 if org1 = org2
839  *      1 if org1 > org2
840  */
CompareOrganismDescriptors(BioSrcDescPtr org1,BioSrcDescPtr org2)841 static int CompareOrganismDescriptors (
842   BioSrcDescPtr org1,
843   BioSrcDescPtr org2
844 )
845 {
846   ValNodePtr vnp1, vnp2;
847   int cmpval;
848 
849   vnp1 = org1->strings;
850   vnp2 = org2->strings;
851 
852   while (vnp1 != NULL && vnp2 != NULL)
853   {
854     cmpval = StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
855     if (cmpval != 0) return cmpval;
856 
857     vnp1 = vnp1->next;
858     vnp2 = vnp2->next;
859   }
860   if (vnp1 == NULL && vnp2 == NULL)
861   {
862     return 0;
863   }
864   else if (vnp1 != NULL && vnp2 == NULL)
865   {
866     return 1;
867   }
868   else
869   {
870     return -1;
871   }
872 }
873 
874 //Not part of Autodef or Cleanup
875 /* The OrgGroupData structure contains a list of BioSrcDescData items
876  * for which the contents of the descriptive strings list are identical,
877  * i.e., all the organisms in the group would have the same description
878  * if you used the modifiers used to generate this list of strings.
879  * The structure also contains the number of organisms in the list
880  * so that it will be easy to tell that the OrgGroup now contains a
881  * single organism with a unique description.
882  */
883 
884 /* The CopyOrgGroupList function creates a copy of the list of OrgGroups */
CopyOrgGroupList(OrgGroupPtr orig)885 static OrgGroupPtr CopyOrgGroupList (
886   OrgGroupPtr orig
887 )
888 {
889   OrgGroupPtr new_ogp_start = NULL, new_ogp;
890 
891   if (orig == NULL) return NULL;
892 
893   new_ogp_start = (OrgGroupPtr) MemNew (sizeof (OrgGroupData));
894   if (new_ogp_start == NULL) return NULL;
895 
896   new_ogp_start->num_organisms = orig->num_organisms;
897   new_ogp_start->org_list = CopyBioSrcDescPtr (orig->org_list);
898   new_ogp_start->next = NULL;
899   orig = orig->next;
900   new_ogp = new_ogp_start;
901   while (orig != NULL) {
902     new_ogp->next = (OrgGroupPtr) MemNew (sizeof (OrgGroupData));
903     new_ogp = new_ogp->next;
904     new_ogp->num_organisms = orig->num_organisms;
905     new_ogp->org_list = CopyBioSrcDescPtr (orig->org_list);
906     new_ogp->next = NULL;
907     orig = orig->next;
908   }
909 
910   return new_ogp_start;
911 }
912 
913 //Not part of Autodef or Cleanup
914 /* The FreeOrgGroupPtr function frees the memory associated with a
915  * list of OrgGroups */
FreeOrgGroupPtr(OrgGroupPtr ogp)916 static void FreeOrgGroupPtr (
917   OrgGroupPtr ogp
918 )
919 {
920   OrgGroupPtr ogp_next;
921 
922   while (ogp != NULL) {
923     ogp_next = ogp->next;
924     FreeBioSrcDescPtr (ogp->org_list);
925     ogp = MemFree (ogp);
926     ogp = ogp_next;
927   }
928   return;
929 }
930 
931 //Not part of Autodef or Cleanup
932 /* The ReorderGroupOrgs function sorts the OrgGroup list based on the results
933  * of the CompareOrganismDescriptors function.
934  */
ReorderGroupOrgs(OrgGroupPtr this_group)935 static void ReorderGroupOrgs (
936   OrgGroupPtr this_group
937 )
938 {
939   BioSrcDescPtr bsdp;
940   BioSrcDescPtr nextBsdp;
941   BioSrcDescPtr prevBsdp;
942   Boolean swap_needed = TRUE;
943 
944   if (this_group->org_list == NULL) return;
945   if (this_group->org_list->next == NULL) return;
946 
947   while (swap_needed)
948   {
949     swap_needed = FALSE;
950     bsdp = this_group->org_list;
951     prevBsdp = NULL;
952     while (bsdp->next != NULL)
953     {
954       nextBsdp = bsdp->next;
955       if (CompareOrganismDescriptors (bsdp, nextBsdp) > 0)
956       {
957         swap_needed = TRUE;
958         bsdp->next = nextBsdp->next;
959         nextBsdp->next = bsdp;
960         if (prevBsdp == NULL)
961         {
962           this_group->org_list = nextBsdp;
963         }
964         else
965         {
966           prevBsdp->next = nextBsdp;
967         }
968         prevBsdp = nextBsdp;
969       }
970       else
971       {
972         prevBsdp = bsdp;
973         bsdp = bsdp->next;
974       }
975     }
976   }
977 }
978 
979 //Not part of Autodef or Cleanup
980 /* The ReGroupOrgs function operates on a single OrgGroup item.
981  * If any of the BioSrcDesc items in the group now have different
982  * descriptions, the function breaks it up into smaller, homogenous OrgGroups.
983  */
ReGroupOrgs(OrgGroupPtr this_group)984 static void ReGroupOrgs (
985   OrgGroupPtr this_group
986 )
987 {
988   BioSrcDescPtr bsdp;
989   OrgGroupPtr new_group;
990   int num_organisms;
991 
992   if (this_group == NULL) return;
993   bsdp = this_group->org_list;
994   if (bsdp == NULL) return;
995   num_organisms = 0;
996   while (bsdp->next != NULL)
997   {
998     num_organisms ++;
999     if (CompareOrganismDescriptors (bsdp, bsdp->next) != 0)
1000     {
1001       /* create new group to hold next set of organisms */
1002       new_group = (OrgGroupPtr) MemNew (sizeof (OrgGroupData));
1003       if (new_group == NULL) return;
1004       new_group->org_list = bsdp->next;
1005       new_group->num_organisms = this_group->num_organisms - num_organisms;
1006       new_group->next = this_group->next;
1007       this_group->next = new_group;
1008       this_group->num_organisms = num_organisms;
1009       bsdp->next = NULL;
1010       ReGroupOrgs (new_group);
1011     }
1012     else
1013     {
1014       bsdp = bsdp->next;
1015     }
1016   }
1017 }
1018 
1019 //Not part of Autodef or Cleanup
1020 /* The AddQualToGroup function operates on a single OrgGroup item.
1021  * The function adds a qualifier to each BioSrcDesc item in the OrgGroup,
1022  * breaks the group into multiple groups if the group is no longer
1023  * homogenous, and sorts the new list.
1024  */
AddQualToGroup(OrgGroupPtr this_group,ModifierItemLocalPtr qual,Int2 feature_index)1025 static void AddQualToGroup (
1026   OrgGroupPtr this_group,
1027   ModifierItemLocalPtr qual,
1028   Int2 feature_index
1029 )
1030 {
1031   BioSrcDescPtr bsdp;
1032 
1033   if (this_group == NULL) return;
1034 
1035   bsdp = this_group->org_list;
1036   while (bsdp != NULL)
1037   {
1038     AddQualToBioSrcDescPtr (bsdp, qual, feature_index);
1039     bsdp= bsdp->next;
1040   }
1041 
1042   /* now reorder organisms and break up group */
1043   ReorderGroupOrgs (this_group);
1044 
1045   ReGroupOrgs (this_group);
1046 }
1047 
1048 //Not part of Autodef or Cleanup
1049 /* The AddQualToGroupList function operates on a list of OrgGroup items.
1050  * It calls AddQualToGroup for each item in the list.
1051  */
AddQualToGroupList(OrgGroupPtr group_list,ModifierItemLocalPtr qual,Int2 feature_index)1052 static void AddQualToGroupList (
1053   OrgGroupPtr group_list,
1054   ModifierItemLocalPtr qual,
1055   Int2 feature_index
1056 )
1057 {
1058   OrgGroupPtr ogp;
1059 
1060   ogp = group_list;
1061   while (ogp != NULL)
1062   {
1063     AddQualToGroup (ogp, qual, feature_index);
1064     ogp = ogp->next;
1065   }
1066 }
1067 
1068 //Not part of Autodef or Cleanup
1069 /* The CopyModifierIndices function creates a new ValNode list with the
1070  * same data.intvalue values for each node as the original modifier_indices
1071  * ValNode list.
1072  */
CopyModifierIndices(ValNodePtr modifier_indices)1073 static ValNodePtr CopyModifierIndices (
1074   ValNodePtr modifier_indices
1075 )
1076 {
1077   ValNodePtr new_indices;
1078 
1079   if (modifier_indices == NULL) return NULL;
1080   new_indices = ValNodeNew (NULL);
1081   if (new_indices == NULL) return NULL;
1082   new_indices->choice = modifier_indices->choice;
1083   new_indices->data.intvalue = modifier_indices->data.intvalue;
1084   new_indices->next = CopyModifierIndices (modifier_indices->next);
1085   return new_indices;
1086 }
1087 
1088 //Not part of Autodef or Cleanup
1089 /* The CopyModifierCombo creates a copy of a ModificationCombination item.
1090  * This includes creating a copy of the number and list of modifiers
1091  * and a copy of the number and list of OrgGroups, as well as copying the
1092  * maximum number of organisms in any one group and the number of unique
1093  * organism descriptions produced by this combination of modifiers.
1094  */
CopyModifierCombo(ModifierCombinationPtr m)1095 static ModifierCombinationPtr CopyModifierCombo (
1096   ModifierCombinationPtr m
1097 )
1098 {
1099   ModifierCombinationPtr newm;
1100   ValNodePtr  vnp;
1101   ValNodePtr  newval;
1102 
1103   newm = (ModifierCombinationPtr) MemNew (sizeof (ModifierCombinationData));
1104   if (newm == NULL) return NULL;
1105 
1106   newm->next = NULL;
1107 
1108   /* copy list of modifier indices */
1109   newm->num_mods = m->num_mods;
1110   newm->modifier_indices = NULL;
1111   vnp = m->modifier_indices;
1112   if (vnp != NULL)
1113   {
1114     newm->modifier_indices = ValNodeNew (NULL);
1115     if (newm->modifier_indices == NULL) return NULL;
1116     newm->modifier_indices->data.intvalue = vnp->data.intvalue;
1117     vnp = vnp->next;
1118     while (vnp != NULL)
1119     {
1120       newval = ValNodeNew (newm->modifier_indices);
1121       if (newval == NULL) return NULL;
1122       newval->data.intvalue = vnp->data.intvalue;
1123       vnp = vnp->next;
1124     }
1125   }
1126 
1127   /* copy groups */
1128   newm->num_groups = m->num_groups;
1129   newm->group_list = CopyOrgGroupList (m->group_list);
1130 
1131   return newm;
1132 }
1133 
1134 //Not part of Autodef or Cleanup
1135 /* This function creates a new ModifierCombination item using the supplied
1136  * OrgGroup list.  It calculates the number of groups, maximum number of
1137  * organisms in any one group, and number of unique organisms.
1138  * Initially there are no modifiers.
1139  */
NewModifierCombo(OrgGroupPtr group_list)1140 static ModifierCombinationPtr NewModifierCombo (
1141   OrgGroupPtr group_list
1142 )
1143 {
1144   ModifierCombinationPtr newm;
1145   OrgGroupPtr  ogp;
1146 
1147   newm = (ModifierCombinationPtr) MemNew (sizeof (ModifierCombinationData));
1148   if (newm == NULL) return NULL;
1149 
1150   newm->num_mods = 0;
1151   newm->modifier_indices = NULL;
1152   newm->num_unique_orgs = 0;
1153 
1154   /* copy groups */
1155   newm->group_list = CopyOrgGroupList (group_list);
1156 
1157   ogp = newm->group_list;
1158   newm->max_orgs_in_group = 0;
1159   newm->num_groups = 0;
1160   while (ogp != NULL)
1161   {
1162     if (newm->max_orgs_in_group < ogp->num_organisms)
1163       newm->max_orgs_in_group = ogp->num_organisms;
1164     if (ogp->num_organisms == 1)
1165       newm->num_unique_orgs ++;
1166     newm->num_groups ++;
1167     ogp = ogp->next;
1168   }
1169 
1170   newm->next = NULL;
1171   return newm;
1172 }
1173 
1174 //Not part of Autodef or Cleanup
1175 /* This function frees the memory associated with a list of
1176  * ModifierCombination items.
1177  */
FreeModifierCombo(ModifierCombinationPtr m)1178 static void FreeModifierCombo (
1179   ModifierCombinationPtr m
1180 )
1181 {
1182   if (m == NULL) return;
1183   FreeModifierCombo (m->next);
1184   ValNodeFree (m->modifier_indices);
1185   FreeOrgGroupPtr (m->group_list);
1186   MemFree (m);
1187 }
1188 
1189 //Not part of Autodef or Cleanup
1190 /* This function adds the qualifier at the feature_index position in the
1191  * DefLineModifiers array to each OrgGroup in the list and recalculates
1192  * the maximum number of organisms in any one group and the number of
1193  * unique organism descriptions generated by this new combination of
1194  * modifiers.
1195  */
AddQualToModifierCombo(ModifierCombinationPtr m,ModifierItemLocalPtr qual,Int2 feature_index)1196 static void AddQualToModifierCombo (
1197   ModifierCombinationPtr m,
1198   ModifierItemLocalPtr qual,
1199   Int2 feature_index
1200 )
1201 {
1202   OrgGroupPtr ogp;
1203   ValNodePtr vnp;
1204 
1205   if (m == NULL) return;
1206 
1207   /* now try adding the modifier, see if the number of groups goes up */
1208   /* if the number of organisms in each group is one, we can stop */
1209   vnp = ValNodeNew (m->modifier_indices);
1210   if (vnp == NULL) return;
1211   if (m->modifier_indices == NULL)
1212   {
1213     m->modifier_indices = vnp;
1214   }
1215   vnp->data.intvalue = feature_index;
1216   m->num_mods ++;
1217   AddQualToGroupList (m->group_list, qual, feature_index);
1218   ogp = m->group_list;
1219   m->max_orgs_in_group = 0;
1220   m->num_unique_orgs = 0;
1221   m->num_groups = 0;
1222   while (ogp != NULL)
1223   {
1224     if (m->max_orgs_in_group < ogp->num_organisms)
1225       m->max_orgs_in_group = ogp->num_organisms;
1226     if (ogp->num_organisms == 1)
1227       m->num_unique_orgs ++;
1228     m->num_groups ++;
1229     ogp = ogp->next;
1230   }
1231 }
1232 
1233 
1234 //Not part of Autodef or Cleanup
1235 /* This function creates the initial OrgGroup list that is copied for every
1236  * ModifierCombination item.
1237  */
BuildTaxOrgGroupList(BioSourcePtr biop,Pointer userdata)1238 static void BuildTaxOrgGroupList (
1239   BioSourcePtr biop,
1240   Pointer userdata
1241 )
1242 {
1243   OrgGroupPtr   ogp;
1244   OrgGroupPtr   prevOgp;
1245   OrgGroupPtr PNTR pogp;
1246   BioSrcDescPtr newBsdp;
1247   OrgRefPtr     orp;
1248   int cmpval;
1249 
1250   pogp = (OrgGroupPtr PNTR) userdata;
1251   ogp = *pogp;
1252 
1253   newBsdp = (BioSrcDescPtr) MemNew (sizeof (BioSrcDescData));
1254   if (newBsdp == NULL) return;
1255   newBsdp->biop = biop;
1256   newBsdp->next = NULL;
1257   newBsdp->strings = NULL;
1258 
1259   /* add tax name as first string */
1260   /* later, move this into a separate function and add special handling */
1261   orp = biop->org;
1262   if (orp != NULL && orp->taxname != NULL)
1263   {
1264     ValNodeCopyStr (&(newBsdp->strings), 0, orp->taxname);
1265   }
1266 
1267   prevOgp = NULL;
1268   cmpval = -1;
1269   while (ogp != NULL && cmpval < 0)
1270   {
1271     if (ogp->org_list != NULL)
1272     {
1273       cmpval = CompareOrganismDescriptors (ogp->org_list, newBsdp);
1274       if (cmpval == 0)
1275       {
1276         newBsdp->next = ogp->org_list;
1277         ogp->org_list = newBsdp;
1278         ogp->num_organisms ++;
1279       }
1280     }
1281     if (cmpval < 0)
1282     {
1283       prevOgp = ogp;
1284       ogp = ogp->next;
1285     }
1286   }
1287   if (cmpval != 0)
1288   {
1289     /* create new group */
1290     ogp = (OrgGroupPtr) MemNew (sizeof (OrgGroupData));
1291     if (ogp == NULL) return;
1292     ogp->org_list = newBsdp;
1293     ogp->num_organisms = 1;
1294     ogp->next = NULL;
1295     if (prevOgp == NULL)
1296     {
1297       ogp->next = *pogp;
1298       *pogp = ogp;
1299     }
1300     else
1301     {
1302       ogp->next = prevOgp->next;
1303       prevOgp->next = ogp;
1304     }
1305   }
1306 }
1307 
1308 typedef struct bestsortdata {
1309   Int4    feature_index;
1310   Boolean all_unique;
1311   Boolean all_present;
1312   Boolean is_unique;
1313 } BestSortData, PNTR BestSortPtr;
1314 
1315 //Not part of Autodef or Cleanup
Index1FoundBeforeIndex2(Int4 index1,Int4 index2,ValNodePtr list)1316 static Boolean Index1FoundBeforeIndex2 (
1317   Int4 index1,
1318   Int4 index2,
1319   ValNodePtr list
1320 )
1321 {
1322   ValNodePtr  vnp;
1323   BestSortPtr bsp;
1324   for (vnp = list; vnp != NULL; vnp = vnp->next)
1325   {
1326     if ((bsp = vnp->data.ptrvalue) == NULL)
1327     {
1328       continue;
1329     }
1330     if (bsp->feature_index == index1) return TRUE;
1331     if (bsp->feature_index == index2) return FALSE;
1332   }
1333   return FALSE;
1334 }
1335 
1336 //Not part of Autodef or Cleanup
1337 /* This function determines whether or not we should try adding this modifier
1338  * to our combination.  If we've already tried it and not added it to the list,
1339  * there's no reason to try adding it again.
1340  */
OkToTryAddingQual(ModifierCombinationPtr m,ModifierItemLocalPtr ItemList,ValNodePtr available_modifiers_list,Int2 feature_index)1341 static Boolean OkToTryAddingQual (
1342   ModifierCombinationPtr m,
1343   ModifierItemLocalPtr ItemList,
1344   ValNodePtr           available_modifiers_list,
1345   Int2 feature_index
1346 )
1347 {
1348   ValNodePtr vnp;
1349 
1350   /* if feature_index indicates a value we don't use for best combos, skip */
1351   if (feature_index == DEFLINE_POS_Map)
1352   {
1353     return FALSE;
1354   }
1355 
1356   if (m == NULL) return TRUE;
1357 
1358   /* if feature_index is lower than anything else on list (other than */
1359   /* a required value, this is a repeat combination, so skip it */
1360   vnp = m->modifier_indices;
1361   while (vnp != NULL)
1362   {
1363     if (feature_index == m->modifier_indices->data.intvalue)
1364       return FALSE;
1365     if (! ItemList[m->modifier_indices->data.intvalue].required &&
1366       Index1FoundBeforeIndex2 (feature_index,
1367                                m->modifier_indices->data.intvalue,
1368                                available_modifiers_list))
1369     {
1370       return FALSE;
1371     }
1372     vnp = vnp->next;
1373   }
1374   return TRUE;
1375 }
1376 
1377 
1378 //Not part of Autodef or Cleanup
GetListOfAvailableModifiers(ModifierItemLocalPtr ItemList)1379 static ValNodePtr GetListOfAvailableModifiers ( ModifierItemLocalPtr ItemList)
1380 {
1381   ValNodePtr  vnp, head;
1382   Int2        feature_index;
1383   BestSortPtr bsp;
1384 
1385   head = NULL;
1386   for (feature_index = 0; feature_index < numDefLineModifiers; feature_index++)
1387   {
1388     if ( ItemList[feature_index].any_present)
1389     {
1390       bsp = (BestSortPtr) MemNew (sizeof (BestSortData));
1391       if (bsp == NULL) return NULL;
1392       bsp->feature_index = feature_index;
1393       bsp->all_unique = ItemList[feature_index].all_unique;
1394       bsp->all_present = ItemList[feature_index].all_present;
1395       bsp->is_unique = ItemList[feature_index].is_unique;
1396       vnp = ValNodeNew (head);
1397       if (vnp == NULL) return NULL;
1398       vnp->data.ptrvalue = bsp;
1399       if (head == NULL) head = vnp;
1400     }
1401   }
1402   return head;
1403 }
1404 
1405 static Int4 DefLineQualSortOrder [] = {
1406   DEFLINE_POS_Transgenic,
1407   DEFLINE_POS_Plasmid_name,
1408   DEFLINE_POS_Endogenous_virus_name,
1409   DEFLINE_POS_Strain,
1410   DEFLINE_POS_Clone,
1411   DEFLINE_POS_Isolate,
1412   DEFLINE_POS_Haplotype,
1413   DEFLINE_POS_Cultivar,
1414   DEFLINE_POS_Specimen_voucher,
1415   DEFLINE_POS_Ecotype,
1416   DEFLINE_POS_Serotype,
1417   DEFLINE_POS_Breed
1418 };
1419 
1420 //Not part of Autodef or Cleanup
SortByImportanceAndPresence(VoidPtr ptr1,VoidPtr ptr2)1421 static int LIBCALLBACK SortByImportanceAndPresence (
1422   VoidPtr ptr1,
1423   VoidPtr ptr2
1424 )
1425 {
1426   ValNodePtr  vnp1;
1427   ValNodePtr  vnp2;
1428   BestSortPtr bsp1, bsp2;
1429   Int4       num_defline_qual_sort_order, index;
1430 
1431   if (ptr1 == NULL && ptr2 == NULL) return 0;
1432 
1433   if (ptr1 == NULL && ptr2 != NULL) return -1;
1434   if (ptr1 != NULL && ptr2 == NULL) return 1;
1435 
1436   vnp1 = *((ValNodePtr PNTR) ptr1);
1437   vnp2 = *((ValNodePtr PNTR) ptr2);
1438   if (vnp1 == NULL || vnp2 == NULL) return 0;
1439   if (vnp1->data.ptrvalue == NULL || vnp2->data.ptrvalue == NULL) return 0;
1440 
1441   bsp1 = vnp1->data.ptrvalue;
1442   bsp2 = vnp2->data.ptrvalue;
1443   if (bsp1->feature_index == bsp2->feature_index) return 0;
1444 
1445   if (bsp1->all_present && bsp1->all_unique
1446     && (! bsp2->all_present || ! bsp2->all_unique))
1447   {
1448     return -1;
1449   }
1450   if (bsp2->all_present && bsp2->all_unique
1451     && (! bsp1->all_present || ! bsp1->all_unique))
1452   {
1453     return 1;
1454   }
1455 
1456   if ( ! bsp1->is_unique && bsp2->is_unique) return -1;
1457   if ( ! bsp2->is_unique && bsp1->is_unique) return 1;
1458 
1459   num_defline_qual_sort_order = sizeof (DefLineQualSortOrder) / sizeof (Int4);
1460   for (index = 0; index < num_defline_qual_sort_order; index++)
1461   {
1462     if (bsp1->feature_index == DefLineQualSortOrder [ index ]) return -1;
1463     if (bsp2->feature_index == DefLineQualSortOrder [ index ]) return 1;
1464   }
1465 
1466   if (bsp1->feature_index > bsp2->feature_index) return 1;
1467   if (bsp1->feature_index < bsp2->feature_index) return -1;
1468   return 0;
1469 }
1470 
1471 
1472 //Not part of Autodef or Cleanup
1473 /* The function FindBestCombo tries to find the best combination of modifiers
1474  * to create unique organism descriptions.  This is accomplished by
1475  * creating a list of required modifiers, and then creating a list of
1476  * combinations of modifiers by adding modifiers one at a time
1477  * to see if the additional modifiers provide any more differentiation in
1478  * the list.
1479  * In order to do this, I start with a list of required modifiers, and
1480  * then create copies of this list.  For each copy I add one of the modifiers
1481  * that are present in the bio sources and not already on the list.
1482  * If adding the modifier increases the differentiation, I add that copy to
1483  * the list of possible combinations, otherwise I discard it.
1484  * The function then makes copies of all of the new items added to the list,
1485  * starting with the item pointed to by start_of_expand, and adds another
1486  * modifier to each combination, keeping the combinations that increase
1487  * the differentiation and discarding the rest.
1488  * This process continues until I have a combination that produces completely
1489  * differentiated bio sources, or I run out of possible combinations.
1490  * If the list of possible combinations is exhausted before each organism
1491  * has a unique description, the function selects the combination from the
1492  * list with the largest number of unique organism descriptions.  If more
1493  * than one combination produces the largest number of unique organisms,
1494  * the combination with the largest number of unique organisms and the
1495  * largest number of groups will be selected.
1496  */
FindBestCombo(SeqEntryPtr sep,ModifierItemLocalPtr ItemList)1497 static ModifierCombinationPtr FindBestCombo(
1498   SeqEntryPtr sep,
1499   ModifierItemLocalPtr ItemList
1500 )
1501 {
1502   OrgGroupPtr group_list;
1503   ModifierCombinationPtr mc_list, start_of_expand, best_found, end_of_list;
1504   ModifierCombinationPtr next_start_of_expand, m, newm;
1505   Int4 num_to_expand, next_num_to_expand;
1506   Int2 i;
1507   ValNodePtr available_modifier_list, vnp;
1508   BestSortPtr bsp;
1509 
1510   best_found = NULL;
1511 
1512   /* first, get list of organisms */
1513   group_list = NULL;
1514   VisitBioSourcesInSep (sep, &group_list, BuildTaxOrgGroupList);
1515 
1516   /* create combo with just the org groups */
1517   mc_list = NewModifierCombo (group_list);
1518   if (mc_list == NULL) return NULL;
1519 
1520   available_modifier_list = GetListOfAvailableModifiers (ItemList);
1521 
1522   /* next, add in any required qualifiers */
1523   for (vnp = available_modifier_list; vnp != NULL; vnp = vnp->next)
1524   {
1525     bsp = vnp->data.ptrvalue;
1526     if (bsp == NULL) return NULL;
1527     if (ItemList[bsp->feature_index].required)
1528     {
1529       AddQualToModifierCombo (mc_list, ItemList + bsp->feature_index,
1530                                        bsp->feature_index);
1531     }
1532   }
1533   if (mc_list->max_orgs_in_group == 1)
1534   {
1535     /* we're done - they're all unique */
1536     best_found = mc_list;
1537     return best_found;
1538   }
1539 
1540   available_modifier_list = ValNodeSort (available_modifier_list,
1541                                          SortByImportanceAndPresence);
1542   start_of_expand = mc_list;
1543   end_of_list = mc_list;
1544   num_to_expand = 1;
1545   while (best_found == NULL && start_of_expand != NULL)
1546   {
1547     next_num_to_expand = 0;
1548     next_start_of_expand = NULL;
1549     for (i=0; i < num_to_expand && start_of_expand != NULL; i++)
1550     {
1551       /* try adding qualifiers */
1552       for (vnp = available_modifier_list;
1553            vnp != NULL && best_found == NULL;
1554            vnp = vnp->next)
1555       {
1556         bsp = vnp->data.ptrvalue;
1557         if (bsp == NULL) return NULL;
1558         if (OkToTryAddingQual (start_of_expand, ItemList,
1559                                available_modifier_list,
1560                                bsp->feature_index))
1561         {
1562           newm = CopyModifierCombo (start_of_expand);
1563           AddQualToModifierCombo (newm, ItemList + bsp->feature_index,
1564                                   bsp->feature_index);
1565           if (start_of_expand->num_groups >= newm->num_groups)
1566           {
1567             /* situation didn't get better, don't bother to add this one */
1568             FreeModifierCombo (newm);
1569             newm = NULL;
1570           }
1571           else if (newm->max_orgs_in_group == 1)
1572           {
1573             best_found = newm;
1574           }
1575           else
1576           {
1577             end_of_list->next = newm;
1578             end_of_list = end_of_list->next;
1579             if (next_start_of_expand == NULL)
1580               next_start_of_expand = newm;
1581             next_num_to_expand++;
1582           }
1583         }
1584       }
1585       if (start_of_expand != NULL)
1586       {
1587         start_of_expand = start_of_expand->next;
1588       }
1589     }
1590     num_to_expand = next_num_to_expand;
1591     if (start_of_expand != NULL)
1592     {
1593       start_of_expand = start_of_expand->next;
1594     }
1595   }
1596 
1597   if (best_found != NULL)
1598   {
1599     FreeModifierCombo (mc_list);
1600     return best_found;
1601   }
1602 
1603   /* we want to find the one with the highest number of unique organisms */
1604   best_found = mc_list;
1605   m = mc_list->next;
1606   while (m!= NULL)
1607   {
1608     if (m->num_unique_orgs > best_found->num_unique_orgs)
1609     {
1610       best_found = m;
1611     }
1612     else if (m->num_unique_orgs == best_found->num_unique_orgs
1613            && m->num_groups > best_found->num_groups)
1614     {
1615       best_found = m;
1616     }
1617     else if (m->num_unique_orgs == best_found->num_unique_orgs
1618            && m->num_groups == best_found->num_groups
1619            && m->num_mods < best_found->num_mods)
1620     {
1621       best_found = m;
1622     }
1623     m = m->next;
1624   }
1625 
1626   m = mc_list;
1627   while (m != NULL)
1628   {
1629     if (m != best_found)
1630     {
1631       newm = m->next;
1632       m->next = NULL;
1633       FreeModifierCombo (m);
1634       m = newm;
1635     }
1636     else
1637     {
1638       FreeModifierCombo (m->next);
1639       m->next = NULL;
1640       m = NULL;
1641     }
1642   }
1643   return best_found;
1644 }
1645 
1646 
1647 //Not part of Autodef or Cleanup
1648 /* create combo with the specified modifiers */
GetModifierIndicesFromModList(ModifierItemLocalPtr modList)1649 NLM_EXTERN ValNodePtr GetModifierIndicesFromModList (
1650   ModifierItemLocalPtr modList
1651 )
1652 {
1653   Int4       feature_index;
1654   ValNodePtr modifier_indices = NULL;
1655 
1656   if (modList == NULL) return NULL;
1657   for (feature_index = 0; feature_index < numDefLineModifiers; feature_index++)
1658   {
1659     if (modList[feature_index].any_present && modList [feature_index].required)
1660     {
1661       ValNodeAddInt (&modifier_indices, 0, feature_index);
1662     }
1663   }
1664   return modifier_indices;
1665 }
1666 //LCOV_EXCL_STOP
1667 
1668 
1669 /* This is the callback function for sorting the modifier list.  It
1670  * implements an order specified by the indexers.
1671  */
1672 static Int4 DefLineQualPresentationOrder [] = {
1673   DEFLINE_POS_Transgenic,
1674   DEFLINE_POS_Strain,
1675   DEFLINE_POS_Isolate,
1676   DEFLINE_POS_Cultivar,
1677   DEFLINE_POS_Specimen_voucher,
1678   DEFLINE_POS_Ecotype,
1679   DEFLINE_POS_Serotype,
1680   DEFLINE_POS_Breed
1681 };
1682 
SortByImportance(VoidPtr ptr1,VoidPtr ptr2)1683 static int LIBCALLBACK SortByImportance (
1684   VoidPtr ptr1,
1685   VoidPtr ptr2
1686 )
1687 {
1688   ValNodePtr vnp1;
1689   ValNodePtr vnp2;
1690   Int4       num_defline_qual_sort_order, index;
1691 
1692   if (ptr1 == NULL && ptr2 == NULL) return 0;
1693 
1694   if (ptr1 == NULL && ptr2 != NULL) return -1;
1695   if (ptr1 != NULL && ptr2 == NULL) return 1;
1696 
1697   vnp1 = *((ValNodePtr PNTR) ptr1);
1698   vnp2 = *((ValNodePtr PNTR) ptr2);
1699   if (vnp1 == NULL || vnp2 == NULL) return 0;
1700   if (vnp1->data.intvalue == vnp2->data.intvalue) return 0;
1701 
1702   num_defline_qual_sort_order = sizeof (DefLineQualPresentationOrder) / sizeof (Int4);
1703   for (index = 0; index < num_defline_qual_sort_order; index++)
1704   {
1705     if (vnp1->data.intvalue == DefLineQualPresentationOrder [ index ]) return -1;
1706     if (vnp2->data.intvalue == DefLineQualPresentationOrder [ index ]) return 1;
1707   }
1708 
1709   if ((vnp1->data.intvalue < 0 || vnp1->data.intvalue > numDefLineModifiers)
1710     && (vnp2->data.intvalue < 0 || vnp2->data.intvalue > numDefLineModifiers))
1711   {
1712     return 0;
1713   }
1714   if (vnp1->data.intvalue < 0 || vnp1->data.intvalue > numDefLineModifiers)
1715   {
1716     return 1;
1717   }
1718   if (vnp2->data.intvalue < 0 || vnp2->data.intvalue > numDefLineModifiers)
1719   {
1720     return -1;
1721   }
1722 
1723   if (DefLineModifiers [ vnp1->data.intvalue].isOrgMod
1724     && (! DefLineModifiers [ vnp2->data.intvalue].isOrgMod
1725       || vnp2->data.intvalue == DEFLINE_POS_Plasmid_name
1726       || vnp2->data.intvalue == DEFLINE_POS_Endogenous_virus_name))
1727   {
1728     return -1;
1729   }
1730   if (DefLineModifiers [ vnp2->data.intvalue].isOrgMod
1731     && (! DefLineModifiers [ vnp1->data.intvalue].isOrgMod
1732       || vnp1->data.intvalue == DEFLINE_POS_Plasmid_name
1733       || vnp1->data.intvalue == DEFLINE_POS_Endogenous_virus_name))
1734   {
1735     return 1;
1736   }
1737 
1738   if (vnp1->data.intvalue == DEFLINE_POS_Plasmid_name)
1739   {
1740     return -1;
1741   }
1742   if (vnp2->data.intvalue == DEFLINE_POS_Plasmid_name)
1743   {
1744     return 1;
1745   }
1746 
1747   if (vnp1->data.intvalue == DEFLINE_POS_Endogenous_virus_name)
1748   {
1749     return -1;
1750   }
1751   if (vnp2->data.intvalue == DEFLINE_POS_Endogenous_virus_name)
1752   {
1753     return 1;
1754   }
1755 
1756   if (! DefLineModifiers [ vnp1->data.intvalue].isOrgMod
1757      && vnp2->data.intvalue == DEFLINE_POS_Clone)
1758   {
1759     return 1;
1760   }
1761   if (! DefLineModifiers [ vnp2->data.intvalue].isOrgMod
1762      && vnp1->data.intvalue == DEFLINE_POS_Clone)
1763   {
1764     return -1;
1765   }
1766 
1767   if (! DefLineModifiers [ vnp1->data.intvalue].isOrgMod
1768      && vnp2->data.intvalue == DEFLINE_POS_Haplotype)
1769   {
1770     return 1;
1771   }
1772   if (! DefLineModifiers [ vnp2->data.intvalue].isOrgMod
1773      && vnp1->data.intvalue == DEFLINE_POS_Haplotype)
1774   {
1775     return -1;
1776   }
1777 
1778   if (vnp1->data.intvalue > vnp2->data.intvalue) return 1;
1779   if (vnp1->data.intvalue < vnp2->data.intvalue) return -1;
1780   return 0;
1781 }
1782 
RecordHasModifier(BioSourcePtr biop,Int4 modifier_index)1783 static Boolean RecordHasModifier (
1784   BioSourcePtr biop,
1785   Int4         modifier_index
1786 )
1787 {
1788   OrgModPtr     mod;
1789   OrgNamePtr    onp;
1790   SubSourcePtr  ssp;
1791 
1792   if (biop == NULL
1793     || modifier_index < 0
1794     || modifier_index >= numDefLineModifiers)
1795   {
1796     return FALSE;
1797   }
1798   if (DefLineModifiers[modifier_index].isOrgMod)
1799   {
1800     if (biop->org == NULL || (onp = biop->org->orgname) == NULL)
1801     {
1802       return FALSE;
1803     }
1804     mod = onp->mod;
1805     while (mod != NULL
1806         && mod->subtype != DefLineModifiers[modifier_index].subtype)
1807     {
1808       mod = mod->next;
1809     }
1810     if (mod != NULL && mod->subname != NULL)
1811     {
1812       return TRUE;
1813     }
1814   } else {
1815     ssp = biop->subtype;
1816     while (ssp != NULL && ssp->subtype != DefLineModifiers[modifier_index].subtype)
1817     {
1818       ssp = ssp->next;
1819     }
1820     if (ssp != NULL && ssp->name != NULL)
1821     {
1822       return TRUE;
1823     }
1824   }
1825   return FALSE;
1826 }
1827 
1828 /* This function adds in required modifiers for HIV sequences */
AddHIVModifierIndices(ValNodePtr PNTR modifier_indices,BioSourcePtr biop,ModifierItemLocalPtr modList,CharPtr taxName,Int4 clone_isolate_HIV_rule_num)1829 static void AddHIVModifierIndices (
1830   ValNodePtr PNTR modifier_indices,
1831   BioSourcePtr biop,
1832   ModifierItemLocalPtr modList,
1833   CharPtr taxName,
1834   Int4    clone_isolate_HIV_rule_num
1835 )
1836 {
1837   ValNodePtr  vnp;
1838   Boolean have_country_in_list;
1839   Boolean have_isolate_in_list;
1840   Boolean have_clone_in_list;
1841   Boolean have_country_mod;
1842   Boolean have_isolate_mod;
1843   Boolean have_clone_mod;
1844 
1845   /* special handling for HIV */
1846   if (StringNICmp (taxName, "HIV-1", 5) != 0
1847     && StringNICmp (taxName, "HIV-2", 5) != 0)
1848   {
1849     return;
1850   }
1851 
1852   have_country_in_list = FALSE;
1853   have_isolate_in_list = FALSE;
1854   have_clone_in_list = FALSE;
1855   have_country_mod = RecordHasModifier (biop, DEFLINE_POS_Country);
1856   have_isolate_mod = RecordHasModifier (biop, DEFLINE_POS_Isolate);
1857   have_clone_mod = RecordHasModifier (biop, DEFLINE_POS_Clone);
1858 
1859   if (modifier_indices != NULL)
1860   {
1861     for (vnp = *modifier_indices;
1862          vnp != NULL
1863            && (! have_country_in_list
1864              || ! have_isolate_in_list
1865              || ! have_clone_in_list);
1866          vnp = vnp->next)
1867     {
1868       if (vnp->data.intvalue == DEFLINE_POS_Country)
1869       {
1870         have_country_in_list = TRUE;
1871       }
1872       else if (vnp->data.intvalue == DEFLINE_POS_Isolate)
1873       {
1874         have_isolate_in_list = TRUE;
1875       }
1876       else if (vnp->data.intvalue == DEFLINE_POS_Clone)
1877       {
1878         have_clone_in_list = TRUE;
1879       }
1880     }
1881   }
1882 
1883   if ( ! have_country_in_list && have_country_mod && modifier_indices != NULL)
1884   {
1885     vnp = ValNodeNew (*modifier_indices);
1886     vnp->data.intvalue = DEFLINE_POS_Country;
1887     if (*modifier_indices == NULL) *modifier_indices = vnp;
1888   }
1889 
1890   if ((have_clone_in_list && have_clone_mod)
1891       || (have_isolate_in_list && have_isolate_mod))
1892   {
1893     /* don't need HIV rule */
1894   }
1895   else
1896   {
1897     if ( ! have_isolate_in_list
1898         && have_isolate_mod
1899         && ( clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_prefer_isolate
1900           || clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_want_both
1901           || ! have_clone_mod) && modifier_indices != NULL)
1902     {
1903       vnp = ValNodeNew (*modifier_indices);
1904       vnp->data.intvalue = DEFLINE_POS_Isolate;
1905       if (*modifier_indices == NULL) *modifier_indices = vnp;
1906     }
1907 
1908     if ( ! have_clone_in_list
1909         && have_clone_mod
1910         && ( clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_prefer_clone
1911           || clone_isolate_HIV_rule_num == clone_isolate_HIV_rule_want_both
1912           || ! have_isolate_mod) && modifier_indices != NULL)
1913     {
1914       vnp = ValNodeNew (*modifier_indices);
1915       vnp->data.intvalue = DEFLINE_POS_Clone;
1916       if (*modifier_indices == NULL) *modifier_indices = vnp;
1917     }
1918   }
1919 }
1920 
1921 /* This function looks for an OrgMod note that contains the phrase
1922  * "type strain of".  This function is used to determine whether
1923  * strain is a required modifier for the defline for this source.
1924  */
HasTypeStrainComment(BioSourcePtr biop)1925 static Boolean HasTypeStrainComment (BioSourcePtr biop)
1926 {
1927   OrgModPtr mod;
1928 
1929   if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL)
1930   {
1931     return FALSE;
1932   }
1933 
1934   mod = biop->org->orgname->mod;
1935   while (mod != NULL && mod->subtype != ORGMOD_strain)
1936   {
1937     mod = mod->next;
1938   }
1939 
1940   if (mod == NULL)
1941   {
1942     return FALSE;
1943   }
1944 
1945   if (!UseOrgModifier (mod, biop->org->taxname, FALSE))
1946   {
1947     return FALSE;
1948   }
1949 
1950   mod = biop->org->orgname->mod;
1951   while (mod != NULL)
1952   {
1953     if (mod->subtype == 255
1954         && StringISearch (mod->subname, "type strain of") != NULL)
1955     {
1956       return TRUE;
1957     }
1958     mod = mod->next;
1959   }
1960   return FALSE;
1961 }
1962 
1963 
1964 /* This function checks to see if there is a type strain comment on
1965  * the bio source.  If there is one, it checks to see whether strain
1966  * is already in the list of modifiers for the definition line.
1967  * If strain is not already in the list, it is added.
1968  */
1969 static void
AddTypeStrainModifierIndices(ValNodePtr PNTR modifier_indices,BioSourcePtr biop)1970 AddTypeStrainModifierIndices
1971 (ValNodePtr PNTR modifier_indices,
1972  BioSourcePtr    biop)
1973 {
1974   ValNodePtr vnp;
1975 
1976   if (modifier_indices == NULL || biop == NULL || ! HasTypeStrainComment (biop))
1977   {
1978     return;
1979   }
1980 
1981   for (vnp = *modifier_indices;
1982        vnp != NULL && vnp->data.intvalue != DEFLINE_POS_Strain;
1983        vnp = vnp->next)
1984   {
1985   }
1986 
1987   if (vnp == NULL)
1988   {
1989     ValNodeAddInt (modifier_indices, 0, DEFLINE_POS_Strain);
1990   }
1991 }
1992 
1993 static Boolean SpecialHandlingForSpecialTechniques (
1994   BioseqPtr bsp
1995 );
1996 
1997 /* This function checks to see if the Bioseq has a WGS technique.
1998  * If so, and if the strain text is not present in the taxname,
1999  * and strain is not already in the list of modifiers for the
2000  * definition line, add strain.
2001  */
2002 static void
AddWGSModifierIndices(ValNodePtr PNTR modifier_indices,BioSourcePtr biop,BioseqPtr bsp)2003 AddWGSModifierIndices
2004 (ValNodePtr PNTR modifier_indices,
2005  BioSourcePtr    biop,
2006  BioseqPtr       bsp)
2007 {
2008   ValNodePtr vnp;
2009   OrgModPtr  omp;
2010 
2011   if (modifier_indices == NULL || biop == NULL
2012       || biop->org == NULL
2013       || biop->org->orgname == NULL
2014       || biop->org->orgname->mod == NULL
2015       || ! SpecialHandlingForSpecialTechniques (bsp))
2016   {
2017     return;
2018   }
2019   //LCOV_EXCL_START
2020   //When creating definition lines, always remove existing ones, so
2021   //SpecialHandlingForSpecialTechniques will never return true
2022 
2023   for (vnp = *modifier_indices;
2024        vnp != NULL && vnp->data.intvalue != DEFLINE_POS_Strain;
2025        vnp = vnp->next)
2026   {
2027   }
2028 
2029   if (vnp == NULL)
2030   {
2031     omp = biop->org->orgname->mod;
2032     while (omp != NULL && omp->subtype != ORGMOD_strain)
2033     {
2034       omp = omp->next;
2035     }
2036     if (omp != NULL)
2037     {
2038       if (StringStr (biop->org->taxname, omp->subname) != NULL)
2039       {
2040         /* don't add, present already */
2041       } else {
2042         /* add strain modifier */
2043         ValNodeAddInt (modifier_indices, 0, DEFLINE_POS_Strain);
2044       }
2045     }
2046   }
2047   //LCOV_EXCL_STOP
2048 }
2049 
2050 /* This function provides a label to be used in the definition line for
2051  * each modifier that requires one.  Most modifiers use a label that is
2052  * similar to the name of the modifier displayed in the definition line
2053  * options dialog.
2054  */
AddModifierLabel(Boolean use_labels,Boolean is_orgmod,Uint1 subtype,CharPtr modifier_text)2055 NLM_EXTERN void AddModifierLabel (
2056   Boolean use_labels,
2057   Boolean is_orgmod,
2058   Uint1   subtype,
2059   CharPtr modifier_text
2060 )
2061 {
2062   CharPtr cp;
2063   if (!is_orgmod && subtype == SUBSRC_endogenous_virus_name)
2064   {
2065     StringCpy (modifier_text, "endogenous virus");
2066   }
2067   else if (is_orgmod && subtype == ORGMOD_specimen_voucher)
2068   {
2069     if (use_labels)
2070     {
2071       StringCpy (modifier_text, "voucher");
2072     }
2073     else
2074     {
2075       modifier_text [0] = 0;
2076     }
2077   }
2078   else if (use_labels
2079            || (!is_orgmod
2080                && (subtype == SUBSRC_transgenic
2081                    || subtype == SUBSRC_plasmid_name)))
2082   {
2083     if (is_orgmod)
2084     {
2085       StringCpy (modifier_text, GetOrgModQualName (subtype));
2086     } else {
2087       StringCpy (modifier_text, GetSubsourceQualName (subtype));
2088     }
2089     modifier_text[0] = tolower(modifier_text[0]);
2090     cp = StringStr (modifier_text, "-name");
2091     if (cp != NULL) *cp = 0;
2092   }
2093   else
2094   {
2095     modifier_text[0] = 0;
2096   }
2097 }
2098 
2099 typedef struct orgmodabbrevdata {
2100   Int2    subtype;
2101   CharPtr abbrev;
2102 } OrgModAbbrevData, PNTR OrgModAbbrevPtr;
2103 
2104 static OrgModAbbrevData orgmod_abbrevs[] = {
2105   { ORGMOD_variety, "var." },
2106   { ORGMOD_forma, "f." },
2107   { ORGMOD_forma_specialis, "f. sp." },
2108   { ORGMOD_pathovar, "pv." }
2109 };
2110 
2111 #define NUM_orgmod_abbrevs sizeof (orgmod_abbrevs) / sizeof (OrgModAbbrevData)
2112 
2113 
2114 
FindModifierTextInTaxname(CharPtr search_text,Int2 subtype,CharPtr taxName,Boolean allow_at_end)2115 static Boolean FindModifierTextInTaxname (CharPtr search_text, Int2 subtype, CharPtr taxName, Boolean allow_at_end)
2116 {
2117   CharPtr value_found, abbrev_start;
2118   Int4    value_len, i;
2119   Boolean other_abbrev_found;
2120 
2121   value_found = StringStr (taxName, search_text);
2122   value_len = StringLen (search_text);
2123   while (value_found != NULL)
2124   {
2125     if (value_found == taxName)
2126     {
2127       value_found = StringStr (value_found + 1, search_text);
2128       continue;
2129     }
2130     if (*(value_found - 1) != ' ' && *(value_found - 1) != '(')
2131     {
2132       value_found = StringStr (value_found + 1, search_text);
2133       continue;
2134     }
2135     if (*(value_found - 1) == ')' && *(value_found + value_len) != ')')
2136     {
2137       value_found = StringStr (value_found + 1, search_text);
2138       continue;
2139     }
2140     if (*(value_found + value_len) != ' ' && *(value_found + value_len) != 0)
2141     {
2142       value_found = StringStr (value_found + 1, search_text);
2143       continue;
2144     }
2145     if (allow_at_end && value_found != NULL && StringCmp (value_found, search_text) == 0) {
2146       return FALSE;
2147     }
2148     other_abbrev_found = FALSE;
2149     for (i = 0; i < NUM_orgmod_abbrevs; i++)
2150     {
2151       abbrev_start = value_found - StringLen (orgmod_abbrevs[i].abbrev) - 1;
2152       if (abbrev_start > taxName
2153         && StringNCmp (abbrev_start,
2154                         orgmod_abbrevs[i].abbrev,
2155                         StringLen (orgmod_abbrevs[i].abbrev)) == 0)
2156       {
2157         if (subtype == orgmod_abbrevs[i].subtype)
2158         {
2159           return TRUE;
2160         }
2161         else
2162         {
2163           other_abbrev_found = TRUE;
2164         }
2165       }
2166     }
2167     if ( ! other_abbrev_found
2168       && ( subtype == ORGMOD_strain
2169         || subtype == ORGMOD_sub_species
2170         || subtype == ORGMOD_specimen_voucher
2171         || subtype == ORGMOD_isolate
2172         || subtype == ORGMOD_cultivar))
2173     {
2174       return TRUE;
2175     }
2176     value_found = StringStr (value_found + 1, search_text);
2177   }
2178   return FALSE;
2179 }
2180 
2181 
2182 /* The UseOrgModifier function looks for the values of certain kinds of
2183  * modifiers in the taxonomy name, so that they will not be added to the
2184  * definition line as modifiers if they are already present in the
2185  * taxonomy name.
2186  */
UseOrgModifier(OrgModPtr mod,CharPtr taxName,Boolean allow_at_end)2187 NLM_EXTERN Boolean UseOrgModifier (
2188   OrgModPtr mod,
2189   CharPtr   taxName,
2190   Boolean   allow_at_end
2191 )
2192 {
2193   Boolean value_found = FALSE;
2194   CharPtr search_text;
2195   CharPtr cp;
2196 
2197   if (mod == NULL || mod->subname == NULL) return FALSE;
2198 
2199   /* If selected modifiers already appear in the tax Name, */
2200   /* don't use them in the organism description again */
2201   if (mod->subtype == ORGMOD_strain
2202     || mod->subtype == ORGMOD_variety
2203     || mod->subtype == ORGMOD_sub_species
2204     || mod->subtype == ORGMOD_forma
2205     || mod->subtype == ORGMOD_forma_specialis
2206     || mod->subtype == ORGMOD_pathovar
2207     || mod->subtype == ORGMOD_specimen_voucher
2208     || mod->subtype == ORGMOD_isolate
2209     || mod->subtype == ORGMOD_cultivar)
2210   {
2211     if (FindModifierTextInTaxname (mod->subname, mod->subtype, taxName, allow_at_end)) {
2212       value_found = TRUE;
2213     } else if (mod->subtype == ORGMOD_specimen_voucher && (cp = StringChr (mod->subname, ':')) != NULL) {
2214       search_text = StringSave (mod->subname);
2215       search_text[cp - mod->subname] = ' ';
2216       value_found = FindModifierTextInTaxname (search_text, mod->subtype, taxName, allow_at_end);
2217       search_text = MemFree (search_text);
2218     }
2219   }
2220   return !value_found;
2221 }
2222 
2223 
2224 /* The UseSubSrcModifier function looks for the values of certain kinds of
2225  * modifiers in the taxonomy name, so that they will not be added to the
2226  * definition line as modifiers if they are already present in the
2227  * taxonomy name.
2228  */
UseSubSrcModifier(SubSourcePtr ssp,CharPtr taxName,Boolean allow_at_end)2229 NLM_EXTERN Boolean UseSubSrcModifier (
2230   SubSourcePtr ssp,
2231   CharPtr   taxName,
2232   Boolean   allow_at_end
2233 )
2234 {
2235   CharPtr clone_text = "enrichment culture clone ";
2236   CharPtr cp;
2237   Boolean rval = TRUE;
2238 
2239   if (ssp == NULL) {
2240     return FALSE;
2241   }
2242 
2243   if (ssp->subtype == SUBSRC_clone)
2244   {
2245     cp = StringISearch (taxName, clone_text);
2246     if (cp != NULL && StringNICmp (cp + StringLen (clone_text), ssp->name, StringLen (ssp->name)) == 0)
2247     {
2248       rval = FALSE;
2249     }
2250   }
2251 
2252   return rval;
2253 }
2254 
2255 //LCOV_EXCL_START
2256 //Not part of Autodef or Cleanup
2257 /* The SetRequiredModifiers function copies the default required values from
2258  * the global DefLineModifiers array into the local list of modifier
2259  * information.
2260  */
SetRequiredModifiers(ModifierItemLocalPtr modList)2261 NLM_EXTERN void SetRequiredModifiers (
2262   ModifierItemLocalPtr modList
2263 )
2264 {
2265   Int4  item_index;
2266 
2267   for (item_index = 0; item_index < numDefLineModifiers; item_index++)
2268   {
2269     modList[item_index].required = IsDeflineModifierRequiredByDefault(DefLineModifiers[item_index].isOrgMod,
2270                                                                       DefLineModifiers[item_index].subtype);
2271   }
2272 
2273 }
2274 
2275 
2276 static const Int4 s_auto_def_id_preferred_quals[] = {
2277   DEFLINE_POS_Strain,
2278   DEFLINE_POS_Clone,
2279   DEFLINE_POS_Isolate,
2280   DEFLINE_POS_Cultivar,
2281   DEFLINE_POS_Specimen_voucher,
2282 };
2283 
2284 static const Int4 k_num_auto_def_id_preferred_quals = sizeof (s_auto_def_id_preferred_quals) / sizeof (Int4);
2285 
2286 //Not part of Autodef or Cleanup
2287 /* This function generates the modifiers for "AutoDefID" */
SetAutoDefIDModifiers(ModifierItemLocalPtr modList)2288 NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList)
2289 {
2290   Int4 index;
2291   Boolean added_required = FALSE;
2292 
2293   if (modList == NULL) return;
2294   /* first look for first modifier in list that is present on all sources */
2295   for (index = 0; index < k_num_auto_def_id_preferred_quals && !added_required; index++) {
2296     if (modList[s_auto_def_id_preferred_quals[index]].all_present) {
2297       modList[s_auto_def_id_preferred_quals[index]].required = TRUE;
2298       added_required = TRUE;
2299     }
2300   }
2301   /* if not found, then look for first modifier in list that is present on any sources */
2302   for (index = 0; index < k_num_auto_def_id_preferred_quals && !added_required; index++) {
2303     if (modList[s_auto_def_id_preferred_quals[index]].any_present) {
2304       modList[s_auto_def_id_preferred_quals[index]].required = TRUE;
2305       added_required = TRUE;
2306     }
2307   }
2308 }
2309 //LCOV_EXCL_STOP
2310 
2311 
2312 /* This function fixes HIV abbreviations, removes items in parentheses,
2313  * and trims spaces around the taxonomy name.
2314  */
CleanUpTaxName(CharPtr taxName,Boolean keep_in_paren)2315 NLM_EXTERN void CleanUpTaxName (
2316   CharPtr taxName,
2317   Boolean keep_in_paren
2318 )
2319 {
2320   CharPtr ptr;
2321 
2322   if (StringICmp (taxName, "Human immunodeficiency virus type 1") == 0
2323     || StringICmp (taxName, "Human immunodeficiency virus 1") == 0)
2324   {
2325     StringCpy (taxName, "HIV-1");
2326   }
2327   else if (StringICmp (taxName, "Human immunodeficiency virus type 2") == 0
2328     || StringICmp (taxName, "Human immunodeficiency virus 2") == 0)
2329   {
2330     StringCpy (taxName, "HIV-2");
2331   }
2332   else
2333   {
2334     if (! keep_in_paren)
2335     {
2336       ptr = StringStr (taxName, "(");
2337       if (ptr != NULL)
2338         *ptr = '\0';
2339     }
2340     TrimSpacesAroundString (taxName);
2341   }
2342 }
2343 
2344 /* This function gets the BioSource descriptor for the BioSeq. */
GetBiopForBsp(BioseqPtr bsp)2345 NLM_EXTERN BioSourcePtr GetBiopForBsp (
2346   BioseqPtr bsp
2347 )
2348 {
2349   SeqMgrDescContext  dcontext;
2350   SeqDescrPtr    sdp;
2351   BioSourcePtr    biop;
2352 
2353   if (bsp == NULL) return NULL;
2354   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
2355   if (sdp != NULL) {
2356     biop = (BioSourcePtr) sdp->data.ptrvalue;
2357     return biop;
2358   }
2359 
2360   return NULL;
2361 }
2362 
2363 
IsSpName(CharPtr taxName)2364 NLM_EXTERN Boolean IsSpName (CharPtr taxName)
2365 {
2366   CharPtr cp;
2367 
2368   cp = StringStr (taxName, " sp.");
2369   /* check to make sure not "f. sp." */
2370   if (cp != NULL && cp[4] == ' '
2371       && (cp - taxName < 2 || *(cp - 2) != 'f' || *(cp - 1) != '.'))
2372   {
2373     return TRUE;
2374   }
2375   else
2376   {
2377     return FALSE;
2378   }
2379 }
2380 
2381 
ValNodeIntCopy(ValNodePtr orig)2382 static ValNodePtr ValNodeIntCopy (ValNodePtr orig)
2383 {
2384   ValNodePtr cpy = NULL, last = NULL, vnp;
2385 
2386   while (orig != NULL) {
2387     vnp = ValNodeNew (NULL);
2388     vnp->choice = orig->choice;
2389     vnp->data.intvalue = orig->data.intvalue;
2390     if (last == NULL) {
2391       cpy = vnp;
2392     } else {
2393       last->next = vnp;
2394     }
2395     last = vnp;
2396     orig = orig->next;
2397   }
2398   return cpy;
2399 }
2400 
2401 
IsTSA(BioseqPtr bsp)2402 NLM_EXTERN Boolean IsTSA (BioseqPtr bsp)
2403 {
2404   SeqDescrPtr sdp;
2405   SeqMgrDescContext context;
2406   MolInfoPtr        mip;
2407   Boolean           rval = FALSE;
2408 
2409   if (bsp == NULL) return FALSE;
2410 
2411   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
2412        sdp != NULL && !rval;
2413        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_molinfo, &context)) {
2414     mip = (MolInfoPtr) sdp->data.ptrvalue;
2415     if (mip != NULL && mip->tech == MI_TECH_tsa) {
2416       rval = TRUE;
2417     }
2418   }
2419   return rval;
2420 }
2421 
2422 
2423 //LCOV_EXCL_START
2424 //Not part of Autodef or Cleanup
IsGenomeProjectIDDescriptor(SeqDescrPtr sdp)2425 NLM_EXTERN Boolean IsGenomeProjectIDDescriptor (SeqDescrPtr sdp)
2426 {
2427   UserObjectPtr        uop;
2428   ObjectIdPtr          oip;
2429 
2430   if (sdp == NULL || sdp->choice != Seq_descr_user) return FALSE;
2431   uop = (UserObjectPtr) sdp->data.ptrvalue;
2432   if (uop != NULL) {
2433     oip = uop->type;
2434     if (oip != NULL && StringCmp (oip->str, "GenomeProjectsDB") == 0) {
2435       return TRUE;
2436     }
2437   }
2438   return FALSE;
2439 }
2440 //LCOV_EXCL_STOP
2441 
2442 
2443 //LCOV_EXCL_START
2444 //Not used for Autodef and Cleanup
GetGenomeProjectIDDescriptor(BioseqPtr bsp)2445 NLM_EXTERN SeqDescrPtr GetGenomeProjectIDDescriptor (BioseqPtr bsp)
2446 {
2447   SeqDescrPtr sdp;
2448 
2449   if (bsp == NULL) return NULL;
2450   sdp = bsp->descr;
2451   while (sdp != NULL) {
2452     if (IsGenomeProjectIDDescriptor(sdp)) {
2453       return sdp;
2454     }
2455     sdp = sdp->next;
2456   }
2457   return NULL;
2458 }
2459 
2460 
2461 //Not used for Autodef and Cleanup
GetGenomeProjectID(BioseqPtr bsp)2462 NLM_EXTERN Int4 GetGenomeProjectID (BioseqPtr bsp)
2463 {
2464   SeqMgrDescContext context;
2465   SeqDescrPtr       sdp;
2466   UserObjectPtr     uop;
2467   UserFieldPtr      ufp;
2468   Int4              gpid = 0;
2469 
2470   if (bsp == NULL) return 0;
2471 
2472   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
2473   while (sdp != NULL && gpid == 0) {
2474     uop = (UserObjectPtr) sdp->data.ptrvalue;
2475     if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0)
2476     {
2477       ufp = uop->data;
2478       while (ufp != NULL && gpid == 0) {
2479         if (ufp->label != NULL
2480             && StringCmp (ufp->label->str, "ProjectID") == 0
2481             && ufp->choice == 2) {
2482           gpid = ufp->data.intvalue;
2483         }
2484         ufp = ufp->next;
2485       }
2486     }
2487     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context);
2488   }
2489 
2490   return gpid;
2491 }
2492 
2493 
2494 //Not part of Autodef or Cleanup
AddSpTaxnameToList(SeqDescrPtr sdp,Pointer userdata)2495 static void AddSpTaxnameToList (SeqDescrPtr sdp, Pointer userdata)
2496 {
2497   BioSourcePtr biop;
2498 
2499   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
2500 
2501   biop = (BioSourcePtr) sdp->data.ptrvalue;
2502   if (biop == NULL || biop->org == NULL || !IsSpName (biop->org->taxname)) return;
2503 
2504   ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, biop->org->taxname);
2505 }
2506 
2507 //Not part of Autodef or Cleanup
ShouldExcludeSp(SeqEntryPtr sep)2508 NLM_EXTERN Boolean ShouldExcludeSp (SeqEntryPtr sep)
2509 {
2510   ValNodePtr name_list = NULL, vnp1, vnp2;
2511   Boolean    all_diff = TRUE;
2512 
2513   if (sep == NULL) return TRUE;
2514   VisitDescriptorsInSep (sep, &name_list, AddSpTaxnameToList);
2515 
2516   name_list = ValNodeSort (name_list, SortVnpByString);
2517 
2518   if (name_list != NULL && name_list->next != NULL)
2519   {
2520     for (vnp1 = name_list; vnp1 != NULL && vnp1->next != NULL && all_diff; vnp1 = vnp1->next)
2521     {
2522       for (vnp2 = vnp1->next; vnp2 != NULL && all_diff; vnp2 = vnp2->next)
2523       {
2524         if (StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue) == 0)
2525         {
2526           all_diff = FALSE;
2527         }
2528       }
2529     }
2530   }
2531   name_list = ValNodeFree (name_list);
2532   return all_diff;
2533 }
2534 //LCOV_EXCL_STOP
2535 
2536 /* This function sets the default values for the organism description settings */
InitOrganismDescriptionModifiers(OrganismDescriptionModifiersPtr odmp,SeqEntryPtr sep)2537 NLM_EXTERN void InitOrganismDescriptionModifiers(OrganismDescriptionModifiersPtr odmp, SeqEntryPtr sep)
2538 {
2539   if (odmp == NULL) {
2540     return;
2541   }
2542   MemSet (odmp, 0, sizeof (OrganismDescriptionModifiers));
2543   odmp->use_labels = TRUE;
2544   odmp->max_mods = -99;
2545   odmp->keep_paren = TRUE;
2546   odmp->exclude_sp = ShouldExcludeSp (sep);
2547   odmp->exclude_cf = FALSE;
2548   odmp->exclude_aff = FALSE;
2549   odmp->exclude_nr = FALSE;
2550   odmp->include_country_extra = FALSE;
2551   odmp->clone_isolate_HIV_rule_num = clone_isolate_HIV_rule_want_both;
2552   odmp->use_modifiers = FALSE;
2553   odmp->allow_semicolon_in_modifier = FALSE;
2554   odmp->allow_mod_at_end_of_taxname = FALSE;
2555 
2556 }
2557 
2558 
AddPlasmid(SubSourcePtr ssp_list,Boolean use_labels,Boolean allow_semicolon_in_modifier)2559 static ValNodePtr AddPlasmid (SubSourcePtr ssp_list, Boolean use_labels, Boolean allow_semicolon_in_modifier)
2560 {
2561   Char         modifier_text [256];
2562   ValNodePtr   strings = NULL;
2563   Uint4        no_semicolon_len, label_len;
2564   Boolean      just_plasmid = FALSE, found_name = FALSE;
2565 
2566   while (ssp_list != NULL)
2567   {
2568     if (ssp_list->name != NULL && ssp_list->name[0] != 0
2569         && ssp_list->subtype == SUBSRC_plasmid_name)
2570     {
2571       if (StringCmp (ssp_list->name, "unnamed") == 0)
2572       {
2573         just_plasmid = TRUE;
2574       }
2575       else
2576       {
2577         found_name = TRUE;
2578         AddModifierLabel (use_labels, FALSE, ssp_list->subtype, modifier_text);
2579         if (modifier_text[0] != 0)
2580         {
2581           StringCat (modifier_text, " ");
2582         }
2583         label_len = StringLen (modifier_text);
2584         if (allow_semicolon_in_modifier)
2585         {
2586           no_semicolon_len = StringLen (ssp_list->name);
2587         }
2588         else
2589         {
2590           no_semicolon_len = StringCSpn (ssp_list->name, ";");
2591         }
2592         if (no_semicolon_len > sizeof (modifier_text) - 1 - label_len)
2593         {
2594           no_semicolon_len = sizeof (modifier_text) - 1 - label_len;
2595         }
2596         StringNCat (modifier_text, ssp_list->name, no_semicolon_len);
2597         modifier_text [ no_semicolon_len + label_len ] = 0;
2598         ValNodeCopyStr( &strings, 0, modifier_text);
2599       }
2600     }
2601     ssp_list = ssp_list->next;
2602   }
2603   if (just_plasmid && !found_name)
2604   {
2605     ValNodeCopyStr( &strings, 0, "plasmid");
2606   }
2607   return strings;
2608 }
2609 
2610 
2611 /* This function generates a string describing the organism based on the
2612  * modifiers selected and other organism description options.
2613  */
GetOrganismDescription(BioseqPtr bsp,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,OrganismDescriptionModifiersPtr odmp)2614 static CharPtr GetOrganismDescription (
2615   BioseqPtr bsp,
2616   ModifierItemLocalPtr modList,
2617   ValNodePtr   modifier_indices,
2618   OrganismDescriptionModifiersPtr odmp
2619 )
2620 {
2621   Char         taxName [196];
2622   Char         modifier_text [256];
2623   ValNodePtr   strings = NULL;
2624   BioSourcePtr biop;
2625   OrgModPtr    mod;
2626   SubSourcePtr ssp;
2627   ValNodePtr   vnp;
2628   Int2         feature_index;
2629   CharPtr      org_desc;
2630   CharPtr      cp;
2631   Uint4        no_semicolon_len, label_len;
2632   CharPtr      tmp;
2633   Char         id[255];
2634   SeqIdPtr     sip;
2635   DbtagPtr     dbtag;
2636   CharPtr      db;
2637 
2638   taxName [0] = '\0';
2639 
2640   biop = GetBiopForBsp (bsp);
2641   if (biop == NULL) return NULL;
2642   if (biop->org == NULL) return NULL;
2643   if (biop->org->taxname == NULL) return NULL;
2644   StringNCpy (taxName, biop->org->taxname, sizeof (taxName) - 1);
2645   taxName [ sizeof (taxName) - 1] = 0;
2646 
2647   CleanUpTaxName (taxName, odmp->keep_paren);
2648 
2649   if (biop->origin == ORG_MUT)
2650   {
2651     ValNodeAddStr (&strings, 0, StringSave ("Mutant"));
2652   }
2653 
2654   ValNodeAddStr (&strings, 0, StringSave (taxName));
2655 
2656   if (odmp->exclude_sp && IsSpName(taxName))
2657   {
2658     ValNodeLink (&strings, AddPlasmid (biop->subtype, odmp->use_labels, odmp->allow_semicolon_in_modifier));
2659     org_desc = MergeValNodeStrings (strings, FALSE);
2660     ValNodeFreeData (strings);
2661     return org_desc;
2662   }
2663 
2664   if (odmp->exclude_cf)
2665   {
2666     cp = StringStr (taxName, " cf.");
2667     if (cp != NULL)
2668     {
2669       org_desc = MergeValNodeStrings (strings, FALSE);
2670       ValNodeFreeData (strings);
2671       return org_desc;
2672     }
2673   }
2674 
2675   if (odmp->exclude_aff)
2676   {
2677     cp = StringStr (taxName, " aff.");
2678     if (cp != NULL)
2679     {
2680       org_desc = MergeValNodeStrings (strings, FALSE);
2681       ValNodeFreeData (strings);
2682       return org_desc;
2683     }
2684   }
2685   if (odmp->exclude_nr)
2686   {
2687     cp = StringStr (taxName, " nr.");
2688     if (cp != NULL)
2689     {
2690       org_desc = MergeValNodeStrings (strings, FALSE);
2691       ValNodeFreeData (strings);
2692       return org_desc;
2693     }
2694   }
2695 
2696 
2697   if (HasTypeStrainComment (biop))
2698   {
2699 
2700   }
2701 
2702   /* copy modifier indices list */
2703   modifier_indices = ValNodeIntCopy (modifier_indices);
2704   AddHIVModifierIndices (&modifier_indices, biop, modList, taxName,
2705                          odmp->clone_isolate_HIV_rule_num);
2706   AddTypeStrainModifierIndices (&modifier_indices, biop);
2707   AddWGSModifierIndices (&modifier_indices, biop, bsp);
2708 
2709   modifier_indices = ValNodeSort (modifier_indices, SortByImportance);
2710   for (vnp = modifier_indices;
2711        vnp != NULL && (odmp->max_mods == -99 || odmp->max_mods > 0);
2712        vnp = vnp->next)
2713   {
2714     feature_index = vnp->data.intvalue;
2715     if (! odmp->use_modifiers && !IsDeflineModifierRequiredByDefault(DefLineModifiers[feature_index].isOrgMod,
2716                                                                      DefLineModifiers[feature_index].subtype))
2717     {
2718       /* do nothing */
2719     }
2720     else if (DefLineModifiers[feature_index].isOrgMod)
2721     {
2722       if (biop->org == NULL || biop->org->orgname == NULL) continue;
2723       mod = biop->org->orgname->mod;
2724       while (mod != NULL
2725         && mod->subtype != DefLineModifiers[feature_index].subtype)
2726       {
2727         mod = mod->next;
2728       }
2729       if (mod != NULL && UseOrgModifier (mod, taxName, odmp->allow_mod_at_end_of_taxname))
2730       {
2731         if (odmp->allow_semicolon_in_modifier) {
2732           no_semicolon_len = StringLen (mod->subname);
2733         } else {
2734           no_semicolon_len = StringCSpn (mod->subname, ";");
2735         }
2736 
2737         if (mod->subtype == ORGMOD_nat_host)
2738         {
2739           sprintf (modifier_text, "from ");
2740           if (no_semicolon_len > sizeof (modifier_text) - 6)
2741           {
2742             no_semicolon_len = sizeof (modifier_text) - 6;
2743           }
2744           StringNCpy (modifier_text + 5, mod->subname,
2745                       no_semicolon_len);
2746           modifier_text[no_semicolon_len + 5] = 0;
2747         }
2748         else
2749         {
2750           AddModifierLabel (odmp->use_labels, TRUE, mod->subtype, modifier_text);
2751           if (modifier_text[0] != 0)
2752             StringCat (modifier_text, " ");
2753           label_len = StringLen (modifier_text);
2754           if (no_semicolon_len > (Int4) sizeof (modifier_text) - label_len - 1)
2755           {
2756             no_semicolon_len = (Int4) sizeof (modifier_text) - label_len - 1;
2757           }
2758           if (mod->subtype == ORGMOD_specimen_voucher && StringNICmp (mod->subname, "personal:", 9) == 0)
2759           {
2760             tmp = mod->subname + 9;
2761             while (isspace (*tmp))
2762             {
2763               tmp++;
2764             }
2765             if (odmp->allow_semicolon_in_modifier) {
2766               no_semicolon_len = StringLen (tmp);
2767             } else {
2768               no_semicolon_len = StringCSpn (tmp, ";");
2769             }
2770           }
2771           else
2772           {
2773             tmp = mod->subname;
2774           }
2775 
2776           StringNCat (modifier_text, tmp,
2777                       no_semicolon_len);
2778           modifier_text [ no_semicolon_len + label_len] = 0;
2779         }
2780         ValNodeCopyStr( &strings, 0, modifier_text);
2781         if (odmp->max_mods != -99)
2782           odmp->max_mods --;
2783       }
2784     } else {
2785       ssp = biop->subtype;
2786       while (ssp != NULL
2787           && ssp->subtype != DefLineModifiers[feature_index].subtype)
2788       {
2789         ssp = ssp->next;
2790       }
2791       if (ssp != NULL && UseSubSrcModifier (ssp, taxName, odmp->allow_mod_at_end_of_taxname))
2792       {
2793         if (odmp->include_country_extra || odmp->allow_semicolon_in_modifier)
2794         {
2795           no_semicolon_len = StringLen (ssp->name);
2796         }
2797         else
2798         {
2799           no_semicolon_len = StringCSpn (ssp->name, ";");
2800         }
2801         AddModifierLabel (odmp->use_labels, FALSE, ssp->subtype, modifier_text);
2802         if (ssp->subtype == SUBSRC_transgenic)
2803         {
2804           /* do nothing, transgenic already captured from label */
2805         }
2806         else if (ssp->subtype == SUBSRC_country)
2807         {
2808           sprintf (modifier_text, "from ");
2809           if (no_semicolon_len > sizeof (modifier_text) - 6)
2810           {
2811             no_semicolon_len = sizeof (modifier_text) - 6;
2812           }
2813           StringNCpy (modifier_text + 5, ssp->name, no_semicolon_len);
2814           modifier_text[5 + no_semicolon_len] = 0;
2815           if (!odmp->include_country_extra)
2816           {
2817             cp = StringChr (modifier_text, ':');
2818             if (cp != NULL) *cp = 0;
2819           }
2820         }
2821         else if (ssp->name != NULL && ssp->name[0] != 0
2822           && (ssp->subtype != SUBSRC_plasmid_name
2823             || StringCmp (ssp->name, "unnamed") != 0))
2824         {
2825           if (modifier_text[0] != 0)
2826             StringCat (modifier_text, " ");
2827           label_len = StringLen (modifier_text);
2828           if (no_semicolon_len > sizeof (modifier_text) - 1 - label_len)
2829           {
2830             no_semicolon_len = sizeof (modifier_text) - 1 - label_len;
2831           }
2832           StringNCat (modifier_text, ssp->name, no_semicolon_len);
2833           modifier_text [ no_semicolon_len + label_len ] = 0;
2834         }
2835 
2836         ValNodeCopyStr( &strings, 0, modifier_text);
2837         if (odmp->max_mods != -99)
2838           odmp->max_mods --;
2839       }
2840     }
2841   }
2842 
2843   /* add TSA project ID if necessary */
2844   if (IsTSA (bsp)) {
2845     db = GetTSAIDDB(bsp);
2846     if (db != NULL) {
2847       for (sip = bsp->id; sip != NULL; sip = sip->next) {
2848         if (sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) {
2849           dbtag = (DbtagPtr) sip->data.ptrvalue;
2850           if (StringCmp (dbtag->db, db) == 0 && dbtag->tag != NULL) {
2851             if (dbtag->tag->str != NULL) {
2852               ValNodeAddPointer (&strings, 0, StringSave (dbtag->tag->str));
2853             } else {
2854               sprintf (id, "%d", dbtag->tag->id);
2855               ValNodeAddPointer (&strings, 0, StringSave (id));
2856             }
2857             break;
2858           }
2859         }
2860       }
2861     }
2862   }
2863 
2864   org_desc = MergeValNodeStrings (strings, FALSE);
2865   ValNodeFreeData (strings);
2866   modifier_indices = ValNodeFree (modifier_indices);
2867   return org_desc;
2868 
2869 }
2870 
2871 /* end of organism description section */
2872 
2873 /* This section of code contains functions which are useful for dealing
2874  * with locations of features (SeqLocPtr objects).
2875  */
2876 
2877 /* This function determines whether location A is on the same strand as
2878  * location B
2879  */
AreAAndBOnSameStrand(SeqLocPtr slp1,SeqLocPtr slp2)2880 static Boolean AreAAndBOnSameStrand (
2881   SeqLocPtr slp1,
2882   SeqLocPtr slp2
2883 )
2884 {
2885   Uint1 strand1;
2886   Uint2 strand2;
2887 
2888   strand1 = SeqLocStrand (slp1);
2889   strand2 = SeqLocStrand (slp2);
2890   if (strand1 == Seq_strand_minus && strand2 != Seq_strand_minus)
2891     return FALSE;
2892   else if (strand1 != Seq_strand_minus && strand2 == Seq_strand_minus)
2893     return FALSE;
2894   else
2895     return TRUE;
2896 }
2897 
2898 /* This function determines whether location A is contained in or equal to
2899  * location B and on the same strand as location B.
2900  */
IsLocAInBonSameStrand(SeqLocPtr slp1,SeqLocPtr slp2)2901 NLM_EXTERN Boolean IsLocAInBonSameStrand (
2902   SeqLocPtr slp1,
2903   SeqLocPtr slp2
2904 )
2905 {
2906   if (! AreAAndBOnSameStrand ( slp1, slp2))
2907   {
2908     return FALSE;
2909   }
2910   else if ( SeqLocAinB (slp1, slp2) < 0)
2911   {
2912     return FALSE;
2913   }
2914   else
2915   {
2916     return TRUE;
2917   }
2918 }
2919 
2920 /* This function calculates the intersection between two locations.
2921  */
SeqLocIntersection(SeqLocPtr slp1,SeqLocPtr slp2,BioseqPtr bsp)2922 static SeqLocPtr SeqLocIntersection (
2923   SeqLocPtr slp1,
2924   SeqLocPtr slp2,
2925   BioseqPtr bsp
2926 )
2927 {
2928   SeqLocPtr diff1, diff2, result;
2929 
2930   diff1 = SeqLocMerge ( bsp, slp1, NULL, FALSE, TRUE, FALSE);
2931   diff1 = SeqLocSubtract (diff1, slp2);
2932   diff2 = SeqLocMerge ( bsp, slp2, NULL, FALSE, TRUE, FALSE);
2933   diff2 = SeqLocSubtract (diff2, slp1);
2934   result = SeqLocMerge ( bsp, slp1, slp2, FALSE, TRUE, FALSE);
2935 
2936   if (diff1 != NULL)
2937   {
2938     result = SeqLocSubtract (result, diff1);
2939     SeqLocFree (diff1);
2940     if (result == NULL) return NULL;
2941   }
2942   if (diff2 != NULL)
2943   {
2944     result = SeqLocSubtract (result, diff2);
2945     SeqLocFree (diff2);
2946     if (result == NULL) return NULL;
2947   }
2948   return result;
2949 }
2950 
2951 #define ADJACENT_TYPE_ANY        0
2952 #define ADJACENT_TYPE_UPSTREAM   1
2953 #define ADJACENT_TYPE_DOWNSTREAM 2
2954 
2955 /* This function determines whether A is "next to" B and upstream or downstream
2956  * from B.  A cannot overlap B.  If allow_interval is TRUE, there can be
2957  * space between A and B.
2958  */
IsAAdjacentToB(SeqLocPtr a,SeqLocPtr b,BioseqPtr bsp,Int2 adjacent_type,Boolean allow_interval)2959 static Boolean IsAAdjacentToB (
2960   SeqLocPtr a,
2961   SeqLocPtr b,
2962   BioseqPtr bsp,
2963   Int2      adjacent_type,
2964   Boolean   allow_interval
2965 )
2966 {
2967   Int4      a_end, b_end;
2968   Uint2     strand;
2969 
2970   if (adjacent_type != ADJACENT_TYPE_ANY
2971     && adjacent_type != ADJACENT_TYPE_UPSTREAM
2972     && adjacent_type != ADJACENT_TYPE_DOWNSTREAM)
2973   {
2974     return FALSE;
2975   }
2976 
2977   if ( ! AreAAndBOnSameStrand (a, b))
2978   {
2979     return FALSE;
2980   }
2981 
2982   strand = SeqLocStrand (a);
2983   if ( adjacent_type == ADJACENT_TYPE_ANY)
2984   {
2985     a_end = GetOffsetInBioseq (a, bsp, SEQLOC_RIGHT_END);
2986     b_end = GetOffsetInBioseq (b, bsp, SEQLOC_LEFT_END);
2987     if ((allow_interval && b_end < a_end)
2988       || b_end == a_end + 1)
2989     {
2990       return TRUE;
2991     }
2992     a_end = GetOffsetInBioseq (a, bsp, SEQLOC_LEFT_END);
2993     b_end = GetOffsetInBioseq (b, bsp, SEQLOC_RIGHT_END);
2994     if ((allow_interval && b_end > a_end)
2995       || a_end == b_end + 1)
2996     {
2997       return TRUE;
2998     }
2999     return FALSE;
3000   }
3001   else if ( (strand == Seq_strand_minus
3002       && adjacent_type == ADJACENT_TYPE_UPSTREAM)
3003     || (strand != Seq_strand_minus
3004       && adjacent_type == ADJACENT_TYPE_DOWNSTREAM))
3005   {
3006     a_end = GetOffsetInBioseq (a, bsp, SEQLOC_RIGHT_END);
3007     b_end = GetOffsetInBioseq (b, bsp, SEQLOC_LEFT_END);
3008     if ((allow_interval && b_end < a_end)
3009       || b_end == a_end + 1)
3010     {
3011       return TRUE;
3012     }
3013     else
3014     {
3015       return FALSE;
3016     }
3017   }
3018   else
3019   {
3020     a_end = GetOffsetInBioseq (a, bsp, SEQLOC_LEFT_END);
3021     b_end = GetOffsetInBioseq (b, bsp, SEQLOC_RIGHT_END);
3022     if ((allow_interval && b_end > a_end)
3023       || a_end == b_end + 1)
3024     {
3025       return TRUE;
3026     }
3027     else
3028     {
3029       return FALSE;
3030     }
3031   }
3032 }
3033 
IsAEmptyIntervalOfB(SeqLocPtr a,SeqLocPtr b,BioseqPtr bsp)3034 static Boolean IsAEmptyIntervalOfB (SeqLocPtr a, SeqLocPtr b, BioseqPtr bsp)
3035 {
3036   Int4 a_right, a_left, b_right, b_left, prev_right, prev_left;
3037   SeqLocPtr slp;
3038   Uint1 a_strand, b_strand;
3039 
3040   if (a == NULL || b == NULL || bsp == NULL) return FALSE;
3041 
3042   a_strand = SeqLocStrand (a);
3043   b_strand = SeqLocStrand (b);
3044   if ((a_strand == Seq_strand_minus && b_strand != Seq_strand_minus)
3045       || (a_strand != Seq_strand_minus && b_strand == Seq_strand_minus)) {
3046       return FALSE;
3047   }
3048 
3049   a_right = GetOffsetInBioseq (a, bsp, SEQLOC_RIGHT_END);
3050   a_left = GetOffsetInBioseq (a, bsp, SEQLOC_LEFT_END);
3051 
3052   slp = SeqLocFindNext (b, NULL);
3053   prev_right = GetOffsetInBioseq (slp, bsp, SEQLOC_RIGHT_END);
3054   prev_left = GetOffsetInBioseq (slp, bsp, SEQLOC_LEFT_END);
3055   slp = SeqLocFindNext (b, slp);
3056   while (slp != NULL) {
3057     b_right = GetOffsetInBioseq (slp, bsp, SEQLOC_RIGHT_END);
3058     b_left = GetOffsetInBioseq (slp, bsp, SEQLOC_LEFT_END);
3059     if (a_left == prev_right + 1 && a_right == b_left - 1) {
3060       return TRUE;
3061     } else if (a_left == b_right + 1 && a_right == prev_left - 1) {
3062       return TRUE;
3063     } else {
3064       prev_right = b_right;
3065       prev_left = b_left;
3066       slp = SeqLocFindNext (b, slp);
3067     }
3068   }
3069   return FALSE;
3070 }
3071 
3072 
3073 //LCOV_EXCL_START
3074 //Due to logic error, this function is never called
LocAContainsIntervalOfB(SeqLocPtr a,SeqLocPtr b)3075 static Boolean LocAContainsIntervalOfB (SeqLocPtr a, SeqLocPtr b)
3076 {
3077   SeqLocPtr interval;
3078   Boolean   rval = FALSE;
3079 
3080   if (a == NULL || b == NULL) return FALSE;
3081 
3082   interval = SeqLocFindNext (b, NULL);
3083   while (interval != NULL && !rval) {
3084     if (IsLocAInBonSameStrand (interval, a)) {
3085       rval = TRUE;
3086     } else {
3087       interval = SeqLocFindNext (b, interval);
3088     }
3089   }
3090   return rval;
3091 }
3092 //LCOV_EXCL_STOP
3093 
3094 
3095 /* This section of code deals with identifying and labeling features
3096  * for the definition line.
3097  * The features currently handled are:
3098  *     genes
3099  *     exons
3100  *     introns
3101  *     LTRs
3102  *     3' UTRs
3103  *     5' UTRs
3104  *     CDSs
3105  *     rRNA
3106  *     mRNA
3107  *     misc RNA
3108  *     snRNA
3109  *     snoRNA
3110  *     insertion sequences
3111  *     integrons
3112  *     D-loops
3113  *     mRNA
3114  *     tRNA
3115  *     control regions
3116  *     misc feature listed as intergenic spacer in comment
3117  *     satellite sequences
3118  *     promoter regions
3119  *     endogenous virus source features
3120  *     transposons
3121  */
3122 
IsGene(SeqFeatPtr sfp)3123 static Boolean LIBCALLBACK IsGene (
3124   SeqFeatPtr sfp
3125 )
3126 {
3127   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return FALSE;
3128   return TRUE;
3129 }
3130 
GetGeneName(GeneRefPtr grp,Boolean suppress_locus_tag)3131 static CharPtr GetGeneName (GeneRefPtr grp, Boolean suppress_locus_tag)
3132 {
3133   ValNodePtr syn;
3134 
3135   if (grp == NULL) return NULL;
3136   if (SeqMgrGeneIsSuppressed (grp)) return NULL;
3137   if (StringDoesHaveText (grp->locus)) return grp->locus;
3138   if (! suppress_locus_tag && StringDoesHaveText (grp->locus_tag))
3139       return grp->locus_tag;
3140   if (StringDoesHaveText (grp->desc)) return grp->desc;
3141   for (syn = grp->syn; syn != NULL; syn = syn->next)
3142   {
3143     if (syn != NULL && syn->data.ptrvalue != NULL)
3144       return syn->data.ptrvalue;
3145   }
3146   return NULL;
3147 }
3148 
GetAlleleName(GeneRefPtr grp,Boolean suppress_locus_tag)3149 static CharPtr GetAlleleName (GeneRefPtr grp, Boolean suppress_locus_tag)
3150 {
3151   size_t  lenallele;
3152   size_t  lengenename;
3153   CharPtr  gene_name;
3154   CharPtr  buffer;
3155 
3156   if (grp == NULL) return NULL;
3157   if (StringHasNoText (grp->allele)) return NULL;
3158   gene_name = GetGeneName (grp, suppress_locus_tag);
3159   if (StringHasNoText (gene_name)) return NULL;
3160   lenallele = StringLen (grp->allele);
3161   lengenename = StringLen (gene_name);
3162 
3163   if (lenallele > lengenename
3164     && StringNICmp (gene_name, grp->allele, lengenename) == 0)
3165   {
3166     return StringSave (grp->allele);
3167   }
3168   else if (grp->allele[0] == '-')
3169   {
3170     buffer = MemNew (lenallele + lengenename + 1);
3171     if (buffer == NULL) return NULL;
3172     StringCpy (buffer, gene_name);
3173     StringCat (buffer, grp->allele);
3174   }
3175   else
3176   {
3177     buffer = MemNew (lenallele + lengenename + 2);
3178     if (buffer == NULL) return NULL;
3179     StringCpy (buffer, gene_name);
3180     StringCat (buffer, "-");
3181     StringCat (buffer, grp->allele);
3182   }
3183 
3184   return buffer;
3185 }
3186 
3187 /* This function compares the gene names and allele names of the gene
3188  * to see if they match.
3189  */
DoGenesMatch(GeneRefPtr grp1,GeneRefPtr grp2,Boolean suppress_locus_tag)3190 static Boolean DoGenesMatch
3191 (GeneRefPtr grp1,
3192  GeneRefPtr grp2,
3193  Boolean suppress_locus_tag)
3194 {
3195   CharPtr name1;
3196   CharPtr name2;
3197 
3198   name1 = GetGeneName (grp1, suppress_locus_tag);
3199   name2 = GetGeneName (grp2, suppress_locus_tag);
3200   if (StringCmp (name1, name2) != 0) return FALSE;
3201 
3202   name1 = GetAlleleName (grp1, suppress_locus_tag);
3203   name2 = GetAlleleName (grp2, suppress_locus_tag);
3204   if ((name1 == NULL && name2 != NULL)
3205     || (name1 != NULL && name2 == NULL))
3206   {
3207     if (name1 != NULL) MemFree (name1);
3208     if (name2 != NULL) MemFree (name2);
3209     return FALSE;
3210   }
3211 
3212   if ((name1 == NULL && name2 == NULL)
3213            || (StringCmp (name1, name2) == 0))
3214   {
3215     if (name1 != NULL) MemFree (name1);
3216     if (name2 != NULL) MemFree (name2);
3217     return TRUE;
3218   }
3219 
3220   if (name1 != NULL) MemFree (name1);
3221   if (name2 != NULL) MemFree (name2);
3222   return  FALSE;
3223 }
3224 
3225 /* This function looks at the pseudo flag on the object itself as well as
3226  * the pseudo flag on the gene reference for the object (if one is present).
3227  */
IsPseudo(SeqFeatPtr sfp)3228 NLM_EXTERN Boolean IsPseudo (
3229   SeqFeatPtr sfp
3230 )
3231 {
3232   GeneRefPtr grp;
3233   SeqMgrFeatContext context;
3234 
3235   if (sfp == NULL) return FALSE;
3236   if (sfp->pseudo) return TRUE;
3237   if (sfp->data.choice == SEQFEAT_GENE)
3238   {
3239     grp = sfp->data.value.ptrvalue;
3240   }
3241   else
3242   {
3243     grp = SeqMgrGetGeneXref (sfp);
3244   }
3245   if (grp == NULL)
3246   {
3247     if (sfp->data.choice != SEQFEAT_GENE) {
3248       sfp = SeqMgrGetOverlappingGene(sfp->location, &context);
3249       return IsPseudo(sfp);
3250     } else {
3251       return FALSE;
3252     }
3253   } else {
3254     return grp->pseudo;
3255   }
3256 }
3257 
IsExon(SeqFeatPtr sfp)3258 static Boolean LIBCALLBACK IsExon (
3259   SeqFeatPtr sfp
3260 )
3261 {
3262   if (sfp == NULL || sfp->idx.subtype != FEATDEF_exon) return FALSE;
3263   return TRUE;
3264 }
3265 
IsIntron(SeqFeatPtr sfp)3266 static Boolean LIBCALLBACK IsIntron (
3267   SeqFeatPtr sfp
3268 )
3269 {
3270   if (sfp == NULL || sfp->idx.subtype != FEATDEF_intron) return FALSE;
3271   return TRUE;
3272 }
3273 
IsExonOrIntron(SeqFeatPtr sfp)3274 static Boolean LIBCALLBACK IsExonOrIntron (SeqFeatPtr sfp)
3275 {
3276   return IsExon(sfp) || IsIntron(sfp);
3277 }
3278 
IsLTR(SeqFeatPtr sfp)3279 static Boolean LIBCALLBACK IsLTR (
3280   SeqFeatPtr sfp
3281 )
3282 {
3283   GBQualPtr gb;
3284 
3285   if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
3286   for (gb = sfp->qual; gb != NULL; gb = gb->next) {
3287       if (StringICmp(gb->qual, "rpt_type") == 0 && StringISearch(gb->val, "long_terminal_repeat") != NULL) {
3288           return TRUE;
3289       }
3290   }
3291 
3292   return FALSE;
3293 }
3294 
GetLTRDescription(SeqFeatPtr sfp)3295 static CharPtr GetLTRDescription (
3296   SeqFeatPtr sfp
3297 )
3298 {
3299   CharPtr description;
3300   size_t comment_len;
3301   if (sfp == NULL) return NULL;
3302   if (sfp->comment == NULL) return NULL;
3303   comment_len = StringLen (sfp->comment);
3304   if (comment_len > 3 && StringCmp (sfp->comment + comment_len - 3, "LTR") == 0)
3305   {
3306     description = (CharPtr) MemNew (comment_len - 3);
3307     if (description == NULL) return NULL;
3308     StringNCpy (description, sfp->comment, comment_len - 4);
3309     description[comment_len - 4] = 0;
3310   }
3311   else
3312   {
3313     description = StringSave (sfp->comment);
3314   }
3315   return description;
3316 }
3317 
Is3UTR(SeqFeatPtr sfp)3318 static Boolean LIBCALLBACK Is3UTR (
3319   SeqFeatPtr sfp
3320 )
3321 {
3322   if (sfp == NULL || sfp->idx.subtype != FEATDEF_3UTR) return FALSE;
3323   return TRUE;
3324 }
3325 
Is5UTR(SeqFeatPtr sfp)3326 static Boolean LIBCALLBACK Is5UTR (
3327   SeqFeatPtr sfp
3328 )
3329 {
3330   if (sfp == NULL || sfp->idx.subtype != FEATDEF_5UTR) return FALSE;
3331   return TRUE;
3332 }
3333 
IsCDS(SeqFeatPtr sfp)3334 static Boolean LIBCALLBACK IsCDS (SeqFeatPtr sfp)
3335 {
3336   if (sfp == NULL) return FALSE;
3337   if (sfp->data.choice == SEQFEAT_CDREGION)
3338     return TRUE;
3339   return FALSE;
3340 }
3341 
3342 
IsuORF(SeqFeatPtr sfp)3343 static Boolean LIBCALLBACK IsuORF (SeqFeatPtr sfp)
3344 {
3345   SeqMgrFeatContext context;
3346   CharPtr           cp;
3347   Int4              len;
3348 
3349   if (sfp == NULL) return FALSE;
3350   if (sfp->data.choice == SEQFEAT_CDREGION
3351       && sfp->product != NULL
3352       && SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &context) == sfp) {
3353     cp = StringStr (context.label, "uORF");
3354     if (cp != NULL && (cp == context.label || *(cp - 1) == ' ')
3355         && (*(cp + 4) == 0 || isspace (*(cp + 4)) || isdigit (*(cp + 4)))) {
3356       return TRUE;
3357     }
3358     if ((len = StringLen (context.label)) >= 14
3359         && StringCmp (context.label + len - 14, "leader peptide") == 0) {
3360       return TRUE;
3361     }
3362   }
3363   return FALSE;
3364 }
3365 
3366 
IsrRNA(SeqFeatPtr sfp)3367 static Boolean LIBCALLBACK IsrRNA (
3368   SeqFeatPtr sfp
3369 )
3370 {
3371   if (sfp == NULL || sfp->idx.subtype != FEATDEF_rRNA) return FALSE;
3372   return TRUE;
3373 }
3374 
IsMiscRNA(SeqFeatPtr sfp)3375 static Boolean LIBCALLBACK IsMiscRNA (
3376   SeqFeatPtr sfp
3377 )
3378 {
3379   if (sfp == NULL
3380     || (sfp->idx.subtype != FEATDEF_misc_RNA
3381       && sfp->idx.subtype != FEATDEF_otherRNA))
3382   {
3383     return FALSE;
3384   }
3385   return TRUE;
3386 }
3387 
IsncRNA(SeqFeatPtr sfp)3388 static Boolean LIBCALLBACK IsncRNA (
3389   SeqFeatPtr sfp
3390 )
3391 {
3392   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return FALSE;
3393   if (sfp->idx.subtype == FEATDEF_scRNA
3394       || sfp->idx.subtype == FEATDEF_snRNA
3395       || sfp->idx.subtype == FEATDEF_snoRNA
3396       || sfp->idx.subtype == FEATDEF_ncRNA
3397       || sfp->idx.subtype == FEATDEF_tmRNA
3398       || sfp->idx.subtype == FEATDEF_misc_RNA
3399       || sfp->idx.subtype == FEATDEF_otherRNA)
3400   {
3401     return TRUE;
3402   }
3403   else
3404   {
3405     return FALSE;
3406   }
3407 }
3408 
3409 
GetncRNAProduct(SeqFeatPtr sfp,Boolean use_ncrna_note)3410 static CharPtr GetncRNAProduct (SeqFeatPtr sfp, Boolean use_ncrna_note)
3411 {
3412   GBQualPtr gbq = NULL;
3413   CharPtr product = NULL, q_class = NULL, q_product = NULL;
3414   CharPtr tmp_class = NULL, cp;
3415   RnaRefPtr rrp;
3416   RNAGenPtr rgp;
3417 
3418   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA)
3419   {
3420     return NULL;
3421   }
3422   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
3423 
3424   if (rrp->ext.choice == 3) {
3425     rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
3426     if (rgp != NULL) {
3427       q_class = rgp->_class;
3428       q_product = rgp->product;
3429     }
3430   } else {
3431     gbq = sfp->qual;
3432     while (gbq != NULL && q_product == NULL) {
3433       if (StringICmp (gbq->qual, "ncRNA_class") == 0
3434           && !StringHasNoText (gbq->val)) {
3435         q_class = gbq->val;
3436       } else if (StringICmp (gbq->qual, "product") == 0
3437           && !StringHasNoText (gbq->val)) {
3438         q_product = gbq->val;
3439       }
3440       gbq = gbq->next;
3441     }
3442   }
3443 
3444   if (q_class != NULL) {
3445     tmp_class = StringSave (q_class);
3446     cp = tmp_class;
3447     while (*cp != 0) {
3448       if (*cp == '_') {
3449         *cp = ' ';
3450       }
3451       cp++;
3452     }
3453   }
3454   if (q_product != NULL) {
3455     if (tmp_class == NULL
3456         || StringStr (q_product, tmp_class) != NULL
3457         || StringCmp (tmp_class, "other") == 0
3458         || StringCmp (tmp_class, "RNase P RNA") == 0) {
3459       product = StringSave (q_product);
3460     } else {
3461       product = (CharPtr) MemNew (sizeof (Char) * (StringLen (q_product) + StringLen (tmp_class) + 2));
3462       sprintf (product, "%s %s", q_product, tmp_class);
3463     }
3464   } else if (q_class != NULL) {
3465     if (use_ncrna_note && !StringHasNoText (sfp->comment)) {
3466       product = StringSave (sfp->comment);
3467     } else if (StringCmp (tmp_class, "other") == 0) {
3468       product = StringSave ("non-coding RNA");
3469     } else {
3470       product = StringSave (tmp_class);
3471     }
3472   } else if ((use_ncrna_note ||
3473               (rrp != NULL && rrp->type == 10))
3474              && !StringHasNoText (sfp->comment)) {
3475     product = StringSave (sfp->comment);
3476   } else if (sfp->idx.subtype == FEATDEF_tmRNA) {
3477     product = StringSave ("tmRNA");
3478   } else {
3479     product = StringSave ("non-coding RNA");
3480   }
3481   tmp_class = MemFree (tmp_class);
3482   cp = StringChr (product, ';');
3483   if (cp != NULL) {
3484     *cp = 0;
3485   }
3486   return product;
3487 }
3488 
3489 
IsPrecursorRNA(SeqFeatPtr sfp)3490 static Boolean LIBCALLBACK IsPrecursorRNA (SeqFeatPtr sfp)
3491 {
3492   if (sfp == NULL || sfp->idx.subtype != FEATDEF_preRNA) return FALSE;
3493   return TRUE;
3494 }
3495 
3496 
3497 static CharPtr mobile_element_keywords [] = {
3498   "insertion sequence",
3499   "retrotransposon",
3500   "non-LTR retrotransposon",
3501   "transposon",
3502   "integron",
3503   "other",
3504   "SINE",
3505   "MITE",
3506   "LINE"
3507 };
3508 
3509 enum mobile_element_keyword_nums
3510 {
3511   eMobileElementInsertionSequence = 0,
3512   eMobileElementRetrotransposon,
3513   eMobileElementNonLTRRetrotransposon,
3514   eMobileElementTransposon,
3515   eMobileElementIntegron,
3516   eMobileElementOther,
3517   eMobileElementSINE,
3518   eMobileElementMITE,
3519   eMobileElementLINE
3520 };
3521 
StartsWithMobileElementKeyword(CharPtr txt)3522 static Int4 StartsWithMobileElementKeyword (CharPtr txt)
3523 {
3524   Int4 i, keyword_len;
3525 
3526   for (i=0; i < sizeof (mobile_element_keywords) / sizeof (CharPtr); i++) {
3527     keyword_len = StringLen (mobile_element_keywords[i]);
3528     if (StringNCmp (txt, mobile_element_keywords[i], keyword_len) == 0
3529         && (*(txt + keyword_len) == ':' || *(txt + keyword_len) == 0)) {
3530       return i;
3531     }
3532   }
3533   return -1;
3534 }
3535 
IsMobileElementGBQual(GBQualPtr gbqual)3536 static Int4 IsMobileElementGBQual (GBQualPtr gbqual)
3537 {
3538   Int4 keyword_idx;
3539   if (gbqual == NULL || gbqual->qual == NULL || gbqual->val == NULL) return -1;
3540   if (StringCmp (gbqual->qual, "mobile_element") != 0 && StringCmp (gbqual->qual, "mobile_element_type") != 0) return -1;
3541   keyword_idx = StartsWithMobileElementKeyword (gbqual->val);
3542   if (keyword_idx < 0) return -1;
3543   if (keyword_idx == eMobileElementOther
3544       && StringStr (gbqual->val, "transposable element") == NULL
3545       && StringStr (gbqual->val, "P element") == NULL) {
3546     return -1;
3547   } else {
3548     return keyword_idx;
3549   }
3550 }
3551 
3552 
FeatureDoesNotGetPartialComplete(SeqFeatPtr sfp)3553 static Boolean FeatureDoesNotGetPartialComplete (SeqFeatPtr sfp)
3554 {
3555   GBQualPtr gbqual;
3556   Int4 keyword_idx;
3557 
3558   if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
3559 
3560   for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
3561   {
3562     keyword_idx = IsMobileElementGBQual(gbqual);
3563     if (keyword_idx == eMobileElementSINE
3564         || keyword_idx == eMobileElementLINE) {
3565       return TRUE;
3566     }
3567   }
3568   return FALSE;
3569 }
3570 
3571 
IsMobileElement(SeqFeatPtr sfp)3572 NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp)
3573 {
3574   GBQualPtr gbqual;
3575   if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE;
3576 
3577   if (sfp->idx.subtype == FEATDEF_repeat_region) {
3578       for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
3579       {
3580           if (IsMobileElementGBQual(gbqual) > -1) {
3581               return TRUE;
3582           }
3583       }
3584       return FALSE;
3585   } else {
3586       return TRUE;
3587   }
3588 }
3589 
IsRemovableMobileElement(SeqFeatPtr sfp)3590 static Boolean LIBCALLBACK IsRemovableMobileElement (SeqFeatPtr sfp)
3591 {
3592   GBQualPtr gbqual;
3593   Int4 keyword_idx;
3594   if (sfp == NULL || (sfp->idx.subtype != FEATDEF_repeat_region && sfp->idx.subtype != FEATDEF_mobile_element)) return FALSE;
3595 
3596   for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
3597   {
3598     keyword_idx = IsMobileElementGBQual(gbqual);
3599     if (keyword_idx >= eMobileElementSINE
3600         && keyword_idx <= eMobileElementLINE) {
3601       return TRUE;
3602     }
3603   }
3604   return FALSE;
3605 }
3606 
GetMobileElementTypeword(CharPtr desc_start,Int4 keyword_idx)3607 static CharPtr GetMobileElementTypeword (CharPtr desc_start, Int4 keyword_idx)
3608 {
3609   if (keyword_idx < 0) return NULL;
3610   if (StringHasNoText (desc_start)) {
3611     return mobile_element_keywords[keyword_idx];
3612   }
3613   switch (keyword_idx) {
3614     case eMobileElementTransposon:
3615       if (StringStr (desc_start, "P-element") != NULL) {
3616         return "P-element";
3617       } else if (StringStr (desc_start, "MITE") != NULL) {
3618         return "MITE";
3619       } else {
3620         return mobile_element_keywords[keyword_idx];
3621       }
3622       break;
3623     case eMobileElementOther:
3624       return "transposable element";
3625       break;
3626     case eMobileElementIntegron:
3627       if (StringStr (desc_start, "superintegron") != NULL) {
3628         return "superintegron";
3629       } else {
3630         return mobile_element_keywords[keyword_idx];
3631       }
3632       break;
3633     default:
3634       return mobile_element_keywords[keyword_idx];
3635       break;
3636   }
3637 }
3638 
3639 
GetMobileElementFeatureLabel(ValNodePtr featlist,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp)3640 static void LIBCALLBACK GetMobileElementFeatureLabel (
3641   ValNodePtr      featlist,
3642   BioseqPtr       bsp,
3643   Uint1           biomol,
3644   FeatureLabelPtr flp
3645 )
3646 {
3647   GBQualPtr  gbqual;
3648   Int4    keyword_idx = -1;
3649   Int4    keyword_len;
3650   Int4    val_len;
3651   SeqFeatPtr      sfp;
3652   CharPtr         desc_start = NULL, typeword, cp;
3653 
3654   flp->pluralizable = TRUE;
3655   flp->is_typeword_first = FALSE;
3656   flp->typeword = NULL;
3657   flp->description = NULL;
3658 
3659   if (featlist == NULL) return;
3660   sfp = featlist->data.ptrvalue;
3661   if (sfp == NULL) return;
3662 
3663   gbqual = sfp->qual;
3664   while (gbqual != NULL
3665          && (keyword_idx = IsMobileElementGBQual(gbqual)) < 0)
3666   {
3667     gbqual = gbqual->next;
3668   }
3669   if (gbqual == NULL) return;
3670 
3671   keyword_len = StringLen (mobile_element_keywords[keyword_idx]);
3672   desc_start = gbqual->val + keyword_len;
3673   while (isspace (*desc_start) || *desc_start == ':') {
3674     desc_start++;
3675   }
3676 
3677   /* find alternate typewords */
3678   typeword = GetMobileElementTypeword(desc_start, keyword_idx);
3679   if (typeword == NULL) return;
3680   keyword_len = StringLen (typeword);
3681 
3682   flp->typeword = StringSave (typeword);
3683   val_len = StringLen (desc_start);
3684 
3685   if (StringHasNoText (desc_start))
3686   {
3687     flp->is_typeword_first = FALSE;
3688     flp->description = NULL;
3689   } else if (StringCmp (desc_start, typeword) == 0) {
3690     /* just the keyword */
3691     flp->is_typeword_first = FALSE;
3692     flp->description = NULL;
3693     return;
3694   } else if (StringNCmp (desc_start, typeword, keyword_len) == 0) {
3695     /* starts with keyword */
3696     /* if keyword is hyphenated portion of name, no pluralization */
3697     if (desc_start[keyword_len] == '-') {
3698       flp->description = StringSave (desc_start);
3699       flp->typeword = MemFree (flp->typeword);
3700       flp->typeword = StringSave ("");
3701       flp->pluralizable = FALSE;
3702     } else {
3703       flp->is_typeword_first = TRUE;
3704       flp->description = StringSave (desc_start + keyword_len + 1);
3705     }
3706     return;
3707   } else if (val_len > 8 && StringCmp (desc_start + val_len - keyword_len, typeword) == 0
3708              && val_len - keyword_len - 1 >= 0
3709              && isspace (*(desc_start + val_len - keyword_len - 1))) {
3710     /* ends with keyword */
3711     flp->is_typeword_first = FALSE;
3712     flp->description = MemNew (val_len - keyword_len);
3713     if (flp->description == NULL) return;
3714     StringNCpy (flp->description, desc_start, val_len - keyword_len - 1);
3715     flp->description[val_len - keyword_len -1] = 0;
3716   } else if ((cp = StringStr (desc_start, typeword)) != NULL
3717              && cp != desc_start
3718              && isspace (*(cp -1))) {
3719     /* keyword in the middle */
3720     flp->description = StringSave (desc_start);
3721     flp->typeword = MemFree (flp->typeword);
3722     flp->typeword = StringSave ("");
3723     flp->pluralizable = FALSE;
3724   } else {
3725     /* keyword not in description */
3726     if (StringICmp (flp->typeword, "integron") == 0) {
3727       flp->is_typeword_first = FALSE;
3728     } else {
3729       flp->is_typeword_first = TRUE;
3730     }
3731     flp->description = StringSave (desc_start);
3732     if (StringCmp (flp->description, "") == 0) {
3733       flp->is_typeword_first = FALSE;
3734     }
3735   }
3736   if (StringCmp (flp->description, "unnamed") == 0) {
3737     flp->description = MemFree (flp->description);
3738   }
3739 }
3740 
3741 
IsEndogenousVirusSequence(SeqFeatPtr sfp)3742 static Boolean LIBCALLBACK IsEndogenousVirusSequence (
3743   SeqFeatPtr sfp
3744 )
3745 {
3746   GBQualPtr gbqual;
3747   if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) return FALSE;
3748 
3749   for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
3750   {
3751     if (StringCmp (gbqual->qual, "endogenous_virus") == 0)
3752       return TRUE;
3753   }
3754   return FALSE;
3755 }
3756 
GetEndogenousVirusSequenceDescription(SeqFeatPtr sfp)3757 static CharPtr GetEndogenousVirusSequenceDescription (
3758   SeqFeatPtr sfp
3759 )
3760 {
3761   GBQualPtr gbqual;
3762 
3763   if (sfp == NULL) return NULL;
3764 
3765   gbqual = sfp->qual;
3766   while (gbqual != NULL && StringCmp (gbqual->qual, "endogenous_virus") != 0)
3767   {
3768     gbqual = gbqual->next;
3769   }
3770   if (gbqual != NULL)
3771   {
3772     if (StringDoesHaveText (gbqual->val)
3773       && StringCmp (gbqual->val, "unnamed") != 0)
3774     {
3775       return StringSave (gbqual->val);
3776     }
3777   }
3778   return NULL;
3779 }
3780 
IsDloop(SeqFeatPtr sfp)3781 static Boolean LIBCALLBACK IsDloop (
3782   SeqFeatPtr sfp
3783 )
3784 {
3785   if (sfp == NULL || sfp->idx.subtype != FEATDEF_D_loop) return FALSE;
3786   return TRUE;
3787 }
3788 
IsmRNA(SeqFeatPtr sfp)3789 static Boolean LIBCALLBACK IsmRNA (
3790   SeqFeatPtr sfp
3791 )
3792 {
3793   if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA) return FALSE;
3794   return TRUE;
3795 }
3796 
IstRNA(SeqFeatPtr sfp)3797 static Boolean LIBCALLBACK IstRNA (
3798   SeqFeatPtr sfp
3799 )
3800 {
3801   if (sfp == NULL || sfp->idx.subtype != FEATDEF_tRNA) return FALSE;
3802   return TRUE;
3803 }
3804 
IsControlRegion(SeqFeatPtr sfp)3805 static Boolean LIBCALLBACK IsControlRegion (
3806   SeqFeatPtr sfp
3807 )
3808 {
3809   if (sfp == NULL
3810     || sfp->idx.subtype != FEATDEF_misc_feature
3811     || sfp->comment == NULL
3812     || StringNCmp (sfp->comment, "control region", StringLen ("control region")) != 0)
3813   {
3814     return FALSE;
3815   }
3816   return TRUE;
3817 }
3818 
IsGeneCluster(SeqFeatPtr sfp)3819 static Boolean LIBCALLBACK IsGeneCluster (
3820   SeqFeatPtr sfp
3821 )
3822 {
3823   if (sfp == NULL
3824     || sfp->idx.subtype != FEATDEF_misc_feature
3825     || sfp->comment == NULL
3826     || (StringStr (sfp->comment, "gene cluster") == NULL
3827         && StringStr (sfp->comment, "gene locus") == NULL))
3828   {
3829     return FALSE;
3830   }
3831   return TRUE;
3832 }
3833 
3834 
GetGeneClusterFeatureLabel(ValNodePtr featlist,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp)3835 static void LIBCALLBACK GetGeneClusterFeatureLabel (
3836   ValNodePtr      featlist,
3837   BioseqPtr       bsp,
3838   Uint1           biomol,
3839   FeatureLabelPtr flp
3840 )
3841 {
3842   SeqFeatPtr main_feat;
3843   CharPtr    cp;
3844   Int4       datalen;
3845 
3846   if (featlist == NULL || featlist->data.ptrvalue == NULL) return;
3847   main_feat = featlist->data.ptrvalue;
3848   if (StringHasNoText (main_feat->comment)) return;
3849   cp = StringStr (main_feat->comment, "gene cluster");
3850   if (cp == NULL)
3851   {
3852     cp = StringStr (main_feat->comment, "gene locus");
3853     if (cp == NULL) return;
3854     flp->typeword = StringSave ("gene locus");
3855   }
3856   else
3857   {
3858     flp->typeword = StringSave ("gene cluster");
3859   }
3860   flp->pluralizable = FALSE;
3861   flp->is_typeword_first = FALSE;
3862   datalen = cp - main_feat->comment;
3863   if (datalen > 0)
3864   {
3865     flp->description = (CharPtr) MemNew ((datalen + 1) * sizeof (Char));
3866     StringNCpy (flp->description, main_feat->comment, datalen);
3867     flp->description [datalen] = 0;
3868     TrimSpacesAroundString (flp->description);
3869   }
3870   else
3871   {
3872     flp->description = NULL;
3873   }
3874 }
3875 
3876 
IsIntergenicSpacer(SeqFeatPtr sfp)3877 static Boolean LIBCALLBACK IsIntergenicSpacer (
3878   SeqFeatPtr sfp
3879 )
3880 {
3881   if (sfp == NULL
3882 || sfp->idx.subtype != FEATDEF_misc_feature
3883 || sfp->comment == NULL
3884 || StringStr(sfp->comment, "intergenic spacer") == NULL)
3885 {
3886     return FALSE;
3887 }
3888 return TRUE;
3889 }
3890 
3891 static ValNodePtr GettRNAGenesAndSpacers(CharPtr str);
3892 static ValNodePtr FreetRNAGenesAndSpacersList(ValNodePtr list);
3893 
IsParsableList(SeqFeatPtr sfp)3894 static Boolean LIBCALLBACK IsParsableList(
3895     SeqFeatPtr sfp
3896     )
3897 {
3898     ValNodePtr list;
3899 
3900     if (sfp == NULL
3901         || sfp->idx.subtype != FEATDEF_misc_feature
3902         || sfp->comment == NULL)
3903     {
3904         return FALSE;
3905     }
3906 
3907     list = GettRNAGenesAndSpacers(sfp->comment);
3908     if (list == NULL)
3909     {
3910         return FALSE;
3911     }
3912     else
3913     {
3914         FreetRNAGenesAndSpacersList(list);
3915         return TRUE;
3916     }
3917 }
3918 
3919 
3920 /* This function produces the default definition line label for a misc_feature
3921  * that has the word "intergenic spacer" in the comment.  If the comment starts
3922  * with the word "contains", "contains" is ignored.  If "intergenic spacer"
3923  * appears first in the comment (or first after the word "contains", the text
3924  * after the words "intergenic spacer" but before the first semicolon (if any)
3925  * appear after the words "intergenic spacer" in the definition line.  If there
3926  * are words after "contains" or at the beginning of the comment before the words
3927  * "intergenic spacer", this text will appear in the definition line before the words
3928  * "intergenic spacer".
3929  */
GetIntergenicSpacerFeatureLabel(ValNodePtr featlist,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp)3930 static void LIBCALLBACK GetIntergenicSpacerFeatureLabel(
3931     ValNodePtr      featlist,
3932     BioseqPtr       bsp,
3933     Uint1           biomol,
3934     FeatureLabelPtr flp
3935     )
3936 {
3937     SeqFeatPtr main_feat;
3938     CharPtr    cp, buffer;
3939     Int4       datalen, offset;
3940 
3941     if (featlist == NULL || featlist->data.ptrvalue == NULL) return;
3942     main_feat = featlist->data.ptrvalue;
3943     if (StringHasNoText(main_feat->comment)) return;
3944     if (StringNCmp(main_feat->comment, "contains ", 9) == 0)
3945     {
3946         buffer = main_feat->comment + 9;
3947     }
3948     else if (StringNCmp(main_feat->comment, "may contain ", 12) == 0)
3949     {
3950         buffer = main_feat->comment + 12;
3951     }
3952     else
3953     {
3954         buffer = main_feat->comment;
3955     }
3956     cp = StringStr(buffer, "intergenic spacer");
3957     if (cp == NULL) return;
3958     flp->typeword = StringSave("intergenic spacer");
3959     flp->pluralizable = FALSE;
3960     if (cp == buffer)
3961     {
3962         flp->is_typeword_first = TRUE;
3963         offset = StringLen("intergenic spacer") + 1;
3964         if (StringNCmp(cp + offset, "and ", 4) == 0
3965             || *(cp + StringLen("intergenic spacer")) == 0)
3966         {
3967             flp->description = NULL;
3968         }
3969         else
3970         {
3971             flp->description = StringSave(cp + StringLen("intergenic spacer") + 1);
3972             cp = StringChr(flp->description, ';');
3973             if (cp != NULL)
3974             {
3975                 *cp = 0;
3976             }
3977         }
3978     }
3979     else
3980     {
3981       flp->is_typeword_first = FALSE;
3982       if (StringCmp(cp + StringLen(flp->typeword), " region") == 0) {
3983         flp->typeword = MemFree(flp->typeword);
3984         flp->typeword = StringSave("intergenic spacer region");
3985       }
3986     datalen = cp - buffer;
3987     flp->description = MemNew ( datalen + 1);
3988     if (flp->description == NULL) return;
3989     StringNCpy (flp->description, buffer, datalen);
3990     flp->description [datalen] = 0;
3991     TrimSpacesAroundString (flp->description);
3992   }
3993 }
3994 
3995 /* These structures are used for parsing tRNA and intergenic spacer information
3996  * from misc_feature comments.
3997  */
3998 typedef struct commentfeat
3999 {
4000   CharPtr product_name;
4001   CharPtr gene_name;
4002 } CommentFeatData, PNTR CommentFeatPtr;
4003 
4004 
CommentFeatFree(CommentFeatPtr cfp)4005 static CommentFeatPtr CommentFeatFree (CommentFeatPtr cfp)
4006 {
4007   if (cfp != NULL) {
4008     cfp->product_name = MemFree (cfp->product_name);
4009     cfp->gene_name = MemFree (cfp->gene_name);
4010     cfp = MemFree (cfp);
4011   }
4012   return cfp;
4013 }
4014 
4015 
CommentFeatListFree(ValNodePtr vnp)4016 static ValNodePtr CommentFeatListFree (ValNodePtr vnp)
4017 {
4018     ValNodePtr vnp_next;
4019 
4020     while (vnp != NULL) {
4021         vnp_next = vnp->next;
4022         vnp->next = NULL;
4023         vnp->data.ptrvalue = CommentFeatFree ((CommentFeatPtr)(vnp->data.ptrvalue));
4024         vnp = ValNodeFree (vnp);
4025         vnp = vnp_next;
4026     }
4027     return vnp;
4028 }
4029 
4030 
4031 typedef struct intergenicspacerdef
4032 {
4033   CharPtr first_gene;
4034   CharPtr second_gene;
4035 } IntergenicSpacerDefData, PNTR IntergenicSpacerDefPtr;
4036 
IntergenicSpacerDefFree(IntergenicSpacerDefPtr ip)4037 static IntergenicSpacerDefPtr IntergenicSpacerDefFree (IntergenicSpacerDefPtr ip)
4038 {
4039   if (ip != NULL) {
4040     ip->first_gene = MemFree (ip->first_gene);
4041     ip->second_gene = MemFree (ip->second_gene);
4042     ip = MemFree (ip);
4043   }
4044   return ip;
4045 }
4046 
4047 
s_tRNAGeneFromProduct(CharPtr product)4048 CharPtr s_tRNAGeneFromProduct (CharPtr product)
4049 {
4050     CharPtr gene = NULL;
4051 
4052     if (StringNCmp (product, "tRNA-", 5) != 0) {
4053       return NULL;
4054     }
4055     product += 5;
4056 
4057     if (StringICmp (product, "Ala") == 0) {
4058         gene = "trnA";
4059     } else if (StringICmp (product, "Asx") == 0) {
4060         gene = "trnB";
4061     } else if (StringICmp (product, "Cys") == 0) {
4062         gene = "trnC";
4063     } else if (StringICmp (product, "Asp") == 0) {
4064         gene = "trnD";
4065     } else if (StringICmp (product, "Glu") == 0) {
4066         gene = "trnE";
4067     } else if (StringICmp (product, "Phe") == 0) {
4068         gene = "trnF";
4069     } else if (StringICmp (product, "Gly") == 0) {
4070         gene = "trnG";
4071     } else if (StringICmp (product, "His") == 0) {
4072         gene = "trnH";
4073     } else if (StringICmp (product, "Ile") == 0) {
4074         gene = "trnI";
4075     } else if (StringICmp (product, "Xle") == 0) {
4076         gene = "trnJ";
4077     } else if (StringICmp (product, "Lys") == 0) {
4078         gene = "trnK";
4079     } else if (StringICmp (product, "Leu") == 0) {
4080         gene = "trnL";
4081     } else if (StringICmp (product, "Met") == 0) {
4082         gene = "trnM";
4083     } else if (StringICmp (product, "Asn") == 0) {
4084         gene = "trnN";
4085     } else if (StringICmp (product, "Pyl") == 0) {
4086         gene = "trnO";
4087     } else if (StringICmp (product, "Pro") == 0) {
4088         gene = "trnP";
4089     } else if (StringICmp (product, "Gln") == 0) {
4090         gene = "trnQ";
4091     } else if (StringICmp (product, "Arg") == 0) {
4092         gene = "trnR";
4093     } else if (StringICmp (product, "Ser") == 0) {
4094         gene = "trnS";
4095     } else if (StringICmp (product, "Thr") == 0) {
4096         gene = "trnT";
4097     } else if (StringICmp (product, "Sec") == 0) {
4098         gene = "trnU";
4099     } else if (StringICmp (product, "Val") == 0) {
4100         gene = "trnV";
4101     } else if (StringICmp (product, "Trp") == 0) {
4102         gene = "trnW";
4103     } else if (StringICmp (product, "OTHER") == 0) {
4104         gene = "trnX";
4105     } else if (StringICmp (product, "Tyr") == 0) {
4106         gene = "trnY";
4107     } else if (StringICmp (product, "Glx") == 0) {
4108         gene = "trnZ";
4109     }
4110     return gene;
4111 }
4112 
4113 
FindNextIntergenicSpacerToken(CharPtr string)4114 static CharPtr FindNextIntergenicSpacerToken (CharPtr string)
4115 {
4116   CharPtr    next_comma, next_and, next_token;
4117 
4118   if (StringHasNoText (string)) {
4119     return NULL;
4120   }
4121 
4122   next_token = string + StringLen (string);
4123   next_comma = StringChr (string, ',');
4124   next_and = StringSearch (string, " and ");
4125   if (next_comma != NULL && next_comma < next_token) {
4126     next_token = next_comma;
4127   }
4128   if (next_and != NULL && next_and < next_token) {
4129     next_token = next_and;
4130   }
4131   return next_token;
4132 }
4133 
s_EndsWith(CharPtr str,CharPtr end)4134 static Boolean s_EndsWith (CharPtr str, CharPtr end)
4135 {
4136   Int4 len, len_end;
4137 
4138   if (str == NULL || end == NULL) {
4139     return FALSE;
4140   }
4141   len = StringLen (str);
4142   len_end = StringLen (end);
4143   if (len < len_end) {
4144     return FALSE;
4145   }
4146   if (StringCmp (str + len - len_end, end) == 0) {
4147     return TRUE;
4148   } else {
4149     return FALSE;
4150   }
4151 }
4152 
4153 
ParseGeneFromNoteForDefLine(CharPtr PNTR comment)4154 static CommentFeatPtr ParseGeneFromNoteForDefLine (CharPtr PNTR comment)
4155 {
4156   CommentFeatPtr tdp;
4157   CharPtr    product_start, product_end, gene_start, gene_end, cp;
4158   Int4       product_len, gene_len, phrase_len;
4159   CharPtr    next_token, phrase;
4160 
4161   if (comment == NULL || *comment == NULL)
4162   {
4163     return NULL;
4164   }
4165   /* spacers are not genes */
4166   if (StringNICmp (*comment, "intergenic", 10) == 0 || StringNICmp (*comment, "spacer", 6) == 0)
4167   {
4168     return NULL;
4169   }
4170 
4171   /* tRNA name must start with "tRNA-" and be followed by one uppercase letter and
4172    * two lowercase letters.
4173    */
4174   product_start = *comment;
4175   gene_start = product_start;
4176 
4177   next_token = FindNextIntergenicSpacerToken (product_start);
4178   phrase_len = next_token - product_start;
4179   phrase = (CharPtr) MemNew (sizeof (Char) * (phrase_len + 1));
4180   StringNCpy (phrase, product_start, phrase_len);
4181   phrase[phrase_len] = 0;
4182   product_start = phrase;
4183 
4184   if (StringISearch (phrase, "intergenic") != NULL
4185     || StringISearch (phrase, "spacer") != NULL) {
4186     /* spacers are not genes */
4187     phrase = MemFree (phrase);
4188     return NULL;
4189   }
4190 
4191   gene_start = StringChr (phrase, '(');
4192   if (gene_start == NULL) {
4193     gene_start = phrase + phrase_len;
4194   }
4195 
4196   product_end = gene_start;
4197 
4198   /* if tRNA, don't require gene name, but parse if present */
4199   while (isspace (*gene_start)) {
4200     gene_start++;
4201   }
4202   if (*gene_start == '(') {
4203     /* parse in gene name */
4204     gene_start++;
4205     gene_end = gene_start;
4206     while (*gene_end != 0 && *gene_end != ')') {
4207       gene_end++;
4208     }
4209     if (*gene_end == 0) {
4210       return NULL;
4211     }
4212     cp = gene_end + 1;
4213     while (*cp != 0 && isspace (*cp)) {
4214       cp++;
4215     }
4216   } else if (StringNICmp (gene_start, "intergenic", 10) == 0 || StringNICmp (gene_start, "spacer", 6) == 0) {
4217     /* spacers are not genes */
4218     phrase = MemFree (phrase);
4219     return NULL;
4220   } else if (StringNCmp (product_start, "tRNA", 4) != 0) {
4221     /* does product end with gene? */
4222     if (s_EndsWith(phrase, " gene")) {
4223       *(phrase + StringLen (phrase) - 5) = 0;
4224       cp = gene_start;
4225       gene_start = NULL;
4226     } else if (s_EndsWith(phrase, " genes")) {
4227       *(phrase + StringLen (phrase) - 5) = 0;
4228       cp = gene_start;
4229       gene_start = NULL;
4230     } else {
4231       phrase = MemFree (phrase);
4232       return NULL;
4233     }
4234   } else {
4235     cp = gene_start;
4236     gene_start = NULL;
4237   }
4238 
4239   /* skip over gene or genes if present */
4240   if (StringNCmp (cp, "genes", 5) == 0) {
4241     cp +=5;
4242     while (*cp != 0 && isspace (*cp)) {
4243       cp++;
4244     }
4245   } else if (StringNCmp (cp, "gene", 4) == 0) {
4246     cp += 4;
4247     while (*cp != 0 && isspace (*cp)) {
4248       cp++;
4249     }
4250   }
4251 
4252   tdp = (CommentFeatPtr) MemNew (sizeof (CommentFeatData));
4253   if (tdp == NULL)
4254   {
4255     phrase = MemFree (phrase);
4256     return NULL;
4257   }
4258   product_len = product_end - product_start;
4259   tdp->product_name = (CharPtr) MemNew (sizeof (Char) * (1 + product_len));
4260   StringNCpy (tdp->product_name, product_start, product_len);
4261   tdp->product_name[product_len] = 0;
4262 
4263   if (gene_start != NULL) {
4264     gene_len = gene_end - gene_start;
4265     tdp->gene_name = (CharPtr) MemNew (sizeof (Char) * (1 + gene_len));
4266     StringNCpy (tdp->gene_name, gene_start, gene_len);
4267     tdp->gene_name[gene_len] = 0;
4268   }
4269 
4270   *comment += (cp - phrase);
4271   phrase = MemFree (phrase);
4272   return tdp;
4273 }
4274 
4275 
ParseIntergenicSpacerFromNoteForDef(CharPtr PNTR comment)4276 static IntergenicSpacerDefPtr ParseIntergenicSpacerFromNoteForDef (CharPtr PNTR comment)
4277 {
4278   IntergenicSpacerDefPtr idp;
4279   CharPtr                first_gene_start, dash, second_gene_start, second_gene_end, cp;
4280   Int4                   first_gene_len, second_gene_len;
4281 
4282   if (comment == NULL || *comment == NULL)
4283   {
4284     return NULL;
4285   }
4286 
4287   /* description must start with "trn" and be followed by one uppercase letter, followed
4288    * by a dash, followed by "trn", followed by one uppercase letter, followed by whitespace,
4289    * followed by the phrase "intergenic spacer".
4290    */
4291   first_gene_start = *comment;
4292   dash = first_gene_start;
4293   while (*dash != 0 && !isspace (*dash) && *dash != '-') {
4294     dash++;
4295   }
4296   if (*dash != '-') {
4297     return NULL;
4298   }
4299   second_gene_start = dash + 1;
4300   second_gene_end = second_gene_start;
4301   while (*second_gene_end != 0 && !isspace (*second_gene_end)) {
4302     second_gene_end ++;
4303   }
4304   if (!isspace (*second_gene_end)) {
4305     return NULL;
4306   }
4307   cp = second_gene_end;
4308   while (isspace (*cp)) {
4309     cp++;
4310   }
4311   if (StringNCmp (cp, "intergenic spacer", 17) != 0) {
4312     return NULL;
4313   }
4314 
4315   idp = (IntergenicSpacerDefPtr) MemNew (sizeof (IntergenicSpacerDefData));
4316   if (idp == NULL)
4317   {
4318     return NULL;
4319   }
4320 
4321   first_gene_len = dash - first_gene_start;
4322   idp->first_gene = (CharPtr) MemNew (sizeof (Char) * (1 + first_gene_len));
4323   StringNCpy (idp->first_gene, first_gene_start, first_gene_len);
4324   idp->first_gene [first_gene_len] = 0;
4325 
4326   second_gene_len = second_gene_end - second_gene_start;
4327   idp->second_gene = (CharPtr) MemNew (sizeof (Char) * (1 + second_gene_len));
4328   StringNCpy (idp->second_gene, second_gene_start, second_gene_len);
4329   idp->second_gene [second_gene_len] = 0;
4330 
4331   *comment = cp + 17;
4332   return idp;
4333 }
4334 
4335 /* This creates a feature clause from a tRNADef structure. */
4336 static FeatureClausePtr
FeatureClauseFromParsedComment(CommentFeatPtr tdp,SeqFeatPtr misc_feat,Boolean is_partial,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)4337 FeatureClauseFromParsedComment
4338 (CommentFeatPtr tdp,
4339  SeqFeatPtr misc_feat,
4340  Boolean    is_partial,
4341  BioseqPtr  bsp,
4342  DeflineFeatureRequestListPtr rp)
4343 {
4344   FeatureClausePtr fcp;
4345   CharPtr gene_fmt = "%s (%s)";
4346 
4347   if (tdp == NULL)
4348   {
4349     return NULL;
4350   }
4351 
4352   fcp = NewFeatureClause ( misc_feat, bsp, rp);
4353   if (fcp != NULL)
4354   {
4355     fcp->feature_label_data.is_typeword_first = FALSE;
4356     if (StringCmp(tdp->product_name, "control region") == 0 || StringCmp(tdp->product_name, "D-loop") == 0) {
4357       fcp->feature_label_data.typeword = StringSave(tdp->product_name);
4358       fcp->feature_label_data.description = StringSave("");
4359     } else {
4360       fcp->feature_label_data.typeword = StringSave ("gene");
4361       if (tdp->gene_name == NULL) {
4362         fcp->feature_label_data.description = StringSave (tdp->product_name);
4363       } else {
4364         fcp->feature_label_data.description = (CharPtr) MemNew (sizeof (Char) * (StringLen (gene_fmt)
4365                                                                                + StringLen (tdp->gene_name)
4366                                                                                + StringLen (tdp->product_name)));
4367         if (fcp->feature_label_data.description != NULL)
4368         {
4369           sprintf (fcp->feature_label_data.description, gene_fmt,
4370                    tdp->product_name, tdp->gene_name);
4371         }
4372       }
4373     }
4374     if (is_partial)
4375     {
4376       fcp->interval = StringSave ("partial sequence");
4377     }
4378     else
4379     {
4380       fcp->interval = StringSave ("complete sequence");
4381     }
4382   }
4383   return fcp;
4384 }
4385 
4386 
AdvancePastSeparators(CharPtr cp)4387 static CharPtr AdvancePastSeparators (CharPtr cp)
4388 {
4389   if (cp == NULL || *cp == '0') return cp;
4390 
4391   if (*cp == ',')
4392   {
4393     cp++;
4394   }
4395   while (isspace (*cp))
4396   {
4397     cp++;
4398   }
4399   if (StringNCmp (cp, "and", 3) == 0)
4400   {
4401     cp += 3;
4402   }
4403   while (isspace (*cp))
4404   {
4405     cp++;
4406   }
4407   return cp;
4408 }
4409 
4410 #define MISCFEAT_TRNA_GENE 1
4411 #define MISCFEAT_TRNA_SPACER 2
4412 
FreetRNAGenesAndSpacersList(ValNodePtr list)4413 static ValNodePtr FreetRNAGenesAndSpacersList (ValNodePtr list)
4414 {
4415   ValNodePtr list_next;
4416 
4417   while (list != NULL) {
4418     list_next = list->next;
4419     if (list->choice == MISCFEAT_TRNA_GENE) {
4420       list->data.ptrvalue = CommentFeatFree (list->data.ptrvalue);
4421     } else if (list->choice == MISCFEAT_TRNA_SPACER) {
4422       list->data.ptrvalue = IntergenicSpacerDefFree (list->data.ptrvalue);
4423     }
4424     list->next = NULL;
4425     list = ValNodeFree (list);
4426     list = list_next;
4427   }
4428   return list;
4429 }
4430 
4431 
GettRNAGenesAndSpacers(CharPtr str)4432 static ValNodePtr GettRNAGenesAndSpacers (CharPtr str)
4433 {
4434   ValNodePtr list = NULL;
4435   CharPtr    cp;
4436   CommentFeatPtr gene, last_gene = NULL;
4437   IntergenicSpacerDefPtr spacer, last_spacer = NULL;
4438   Boolean                none_left = FALSE, names_correct = TRUE, alternating = TRUE;
4439   Int4                   last_item_type = 0;
4440 
4441   if (StringNICmp (str, "contains ", 9) == 0) {
4442     cp = str + 9;
4443   } else if (StringNICmp (str, "may contain ", 12) == 0) {
4444     cp = str + 12;
4445   } else {
4446     return NULL;
4447   }
4448 
4449   while (isspace (*cp))
4450   {
4451     cp ++;
4452   }
4453 
4454   while (!none_left && *cp != 0 && *cp != ';' && alternating && names_correct) {
4455     none_left = TRUE;
4456     gene = ParseGeneFromNoteForDefLine (&cp);
4457     if (gene != NULL) {
4458       /* if previous item was spacer, spacer names and gene names must agree */
4459       if (last_item_type == MISCFEAT_TRNA_SPACER && last_spacer != NULL) {
4460         if (gene->gene_name == NULL) {
4461           if (StringCmp (gene->product_name, last_spacer->second_gene) != 0
4462               && StringCmp (last_spacer->second_gene, s_tRNAGeneFromProduct (gene->product_name)) != 0) {
4463             names_correct = FALSE;
4464           }
4465         } else if (StringCmp (last_spacer->second_gene, gene->gene_name) != 0) {
4466           names_correct = FALSE;
4467         }
4468       }
4469       ValNodeAddPointer (&list, MISCFEAT_TRNA_GENE, gene);
4470       cp = AdvancePastSeparators (cp);
4471       none_left = FALSE;
4472       last_item_type = MISCFEAT_TRNA_GENE;
4473       last_gene = gene;
4474     }
4475 
4476     spacer = ParseIntergenicSpacerFromNoteForDef (&cp);
4477     if (spacer != NULL) {
4478       /* must alternate between genes and spacers */
4479       if (last_item_type == MISCFEAT_TRNA_SPACER) {
4480         alternating = FALSE;
4481       }
4482       /* spacer names and gene names must agree */
4483       if (last_gene != NULL) {
4484         if (last_gene->gene_name == NULL) {
4485           if (StringCmp (last_gene->product_name, spacer->first_gene) != 0
4486               && StringCmp (s_tRNAGeneFromProduct (last_gene->product_name), spacer->first_gene) != 0) {
4487             names_correct = FALSE;
4488           }
4489         } else if (StringCmp (last_gene->gene_name, spacer->first_gene) != 0) {
4490           names_correct = FALSE;
4491         }
4492       }
4493       ValNodeAddPointer (&list, MISCFEAT_TRNA_SPACER, spacer);
4494       cp = AdvancePastSeparators (cp);
4495       none_left = FALSE;
4496       last_item_type = MISCFEAT_TRNA_SPACER;
4497       last_spacer = spacer;
4498     }
4499   }
4500   if ((*cp != 0 && *cp != ';') || !alternating || !names_correct) {
4501     list = FreetRNAGenesAndSpacersList (list);
4502   }
4503   return list;
4504 }
4505 
4506 
4507 /* This function produces a feature clause list that should replace the original
4508  * single clause for a misc_feat that contains a note with one or more tRNAs and
4509  * an intergenic spacer.
4510  */
4511 static ValNodePtr
ParsetRNAIntergenicSpacerElements(SeqFeatPtr misc_feat,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)4512 ParsetRNAIntergenicSpacerElements
4513 (SeqFeatPtr misc_feat,
4514 BioseqPtr   bsp,
4515 DeflineFeatureRequestListPtr rp)
4516 {
4517   FeatureClausePtr fcp;
4518   ValNodePtr head = NULL, vnp, list;
4519   Boolean partial5, partial3;
4520   IntergenicSpacerDefPtr spacer = NULL;
4521   Boolean                current_is_partial;
4522 
4523   if (misc_feat == NULL
4524       || StringHasNoText (misc_feat->comment))
4525   {
4526     return NULL;
4527   }
4528 
4529   list = GettRNAGenesAndSpacers (misc_feat->comment);
4530   if (list != NULL) {
4531     if (StringNICmp (misc_feat->comment, "may contain ", 12) == 0) {
4532       fcp = NewFeatureClause (misc_feat, bsp, rp);
4533       fcp->feature_label_data.description = StringSave (misc_feat->comment + 12);
4534       fcp->interval = StringSave ("region");
4535       ValNodeAddPointer (&head, DEFLINE_CLAUSEPLUS, fcp);
4536     } else {
4537       CheckSeqLocForPartial (misc_feat->location, &partial5, &partial3);
4538       for (vnp = list; vnp != NULL; vnp = vnp->next) {
4539         current_is_partial = (partial5 && vnp == list) || (partial3 && vnp->next == NULL);
4540         if (vnp->data.ptrvalue == NULL) continue;
4541         if (vnp->choice == MISCFEAT_TRNA_GENE) {
4542           fcp = FeatureClauseFromParsedComment (vnp->data.ptrvalue, misc_feat, current_is_partial, bsp, rp);
4543           if (fcp != NULL) {
4544             ValNodeAddPointer (&head, DEFLINE_CLAUSEPLUS, fcp);
4545           }
4546         } else if (vnp->choice == MISCFEAT_TRNA_SPACER) {
4547           spacer = (IntergenicSpacerDefPtr) vnp->data.ptrvalue;
4548           fcp = NewFeatureClause ( misc_feat, bsp, rp);
4549           if (fcp != NULL)
4550           {
4551             fcp->feature_label_data.is_typeword_first = FALSE;
4552             fcp->feature_label_data.typeword = StringSave ("intergenic spacer");
4553             fcp->feature_label_data.description = (CharPtr) MemNew (10 * sizeof (Char));
4554             if (fcp->feature_label_data.description != NULL)
4555             {
4556               sprintf (fcp->feature_label_data.description, "%s-%s",
4557                       spacer->first_gene, spacer->second_gene);
4558             }
4559             if (current_is_partial)
4560             {
4561               fcp->interval = StringSave ("partial sequence");
4562             }
4563             else
4564             {
4565               fcp->interval = StringSave ("complete sequence");
4566             }
4567             ValNodeAddPointer (&head, DEFLINE_CLAUSEPLUS, fcp);
4568           }
4569         }
4570       }
4571     }
4572     list = FreetRNAGenesAndSpacersList (list);
4573   }
4574   return head;
4575 }
4576 
IsRegulatory(SeqFeatPtr sfp)4577 static Boolean IsRegulatory(SeqFeatPtr sfp)
4578 {
4579   ImpFeatPtr imp;
4580 
4581   if (sfp == NULL ||
4582       sfp->data.choice != SEQFEAT_IMP ||
4583       (imp = (ImpFeatPtr)(sfp->data.value.ptrvalue)) == NULL ||
4584       StringCmp(imp->key, "regulatory") != 0) {
4585     return FALSE;
4586   } else {
4587     return TRUE;
4588   }
4589 }
4590 
4591 
GetRegulatoryClass(SeqFeatPtr sfp)4592 static CharPtr GetRegulatoryClass(SeqFeatPtr sfp)
4593 {
4594   GBQualPtr gbqual;
4595 
4596   if (sfp == NULL || !IsRegulatory(sfp)) {
4597     return NULL;
4598   }
4599   for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next) {
4600     if (StringICmp(gbqual->qual, "regulatory_class") == 0) {
4601       return gbqual->val;
4602     }
4603   }
4604   return NULL;
4605 }
4606 
4607 
IsSatelliteSequence(SeqFeatPtr sfp)4608 static Boolean LIBCALLBACK IsSatelliteSequence (
4609   SeqFeatPtr sfp
4610 )
4611 {
4612   GBQualPtr gbqual;
4613   if (sfp == NULL
4614     || sfp->idx.subtype != FEATDEF_repeat_region)
4615   {
4616     return FALSE;
4617   }
4618   for (gbqual = sfp->qual; gbqual != NULL; gbqual = gbqual->next)
4619   {
4620     if (StringCmp (gbqual->qual, "satellite") == 0)
4621     {
4622       return TRUE;
4623     }
4624   }
4625   return FALSE;
4626 }
4627 
IsPromoter(SeqFeatPtr sfp)4628 static Boolean LIBCALLBACK IsPromoter (
4629   SeqFeatPtr sfp
4630 )
4631 {
4632   if (sfp == NULL) {
4633     return FALSE;
4634   } else if (sfp->idx.subtype == FEATDEF_promoter) {
4635     return TRUE;
4636   } else if (StringCmp (GetRegulatoryClass(sfp), "promoter") == 0) {
4637     return TRUE;
4638   }
4639   return FALSE;
4640 }
4641 
IsEndogenousVirusSourceFeature(SeqFeatPtr sfp)4642 static Boolean LIBCALLBACK IsEndogenousVirusSourceFeature (
4643   SeqFeatPtr sfp
4644 )
4645 {
4646   BioSourcePtr biop;
4647   SubSourcePtr  ssp;
4648 
4649   if (sfp == NULL || sfp->idx.subtype != FEATDEF_BIOSRC) return FALSE;
4650   if ((biop = sfp->data.value.ptrvalue) == NULL) return FALSE;
4651   ssp = biop->subtype;
4652   while (ssp != NULL && ssp->subtype != SUBSRC_endogenous_virus_name)
4653   {
4654     ssp = ssp->next;
4655   }
4656   if (ssp != NULL) return TRUE;
4657   return FALSE;
4658 }
4659 
GetEndogenousVirusSourceFeatureDescription(SeqFeatPtr sfp)4660 static CharPtr GetEndogenousVirusSourceFeatureDescription (
4661   SeqFeatPtr sfp
4662 )
4663 {
4664   BioSourcePtr biop;
4665   SubSourcePtr  ssp;
4666 
4667   if (sfp == NULL || sfp->idx.subtype != FEATDEF_BIOSRC) return NULL;
4668   if ((biop = sfp->data.value.ptrvalue) == NULL) return NULL;
4669   ssp = biop->subtype;
4670   while (ssp != NULL && ssp->subtype != SUBSRC_endogenous_virus_name)
4671   {
4672     ssp = ssp->next;
4673   }
4674   if (ssp != NULL && ssp->name != NULL)
4675   {
4676     return StringSave (ssp->name);
4677   }
4678   return NULL;
4679 }
4680 
4681 
4682 static CharPtr noncoding_feature_keywords[] = {
4683   "similar to ",
4684   "contains "
4685 };
4686 
find_noncoding_feature_keyword(CharPtr comment)4687 static CharPtr find_noncoding_feature_keyword (
4688   CharPtr comment
4689 )
4690 {
4691   Int4 i, num_noncoding_feature_keywords, keywordlen;
4692   CharPtr cp, buffer;
4693 
4694   if (comment == NULL) return NULL;
4695   num_noncoding_feature_keywords = sizeof (noncoding_feature_keywords) / sizeof (CharPtr);
4696   for (i=0; i < num_noncoding_feature_keywords; i++)
4697   {
4698     keywordlen = StringLen (noncoding_feature_keywords [i]);
4699     buffer = comment;
4700     while ((cp = StringStr (buffer, noncoding_feature_keywords [i])) != NULL)
4701     {
4702       if ( StringNCmp (cp + keywordlen,
4703                        "GenBank Accession Number",
4704                        StringLen ("GenBank Accession Number")) != 0)
4705       {
4706         return cp + keywordlen;
4707       }
4708       else
4709       {
4710         buffer = cp + 1;
4711       }
4712     }
4713   }
4714   return NULL;
4715 }
4716 
4717 
4718 // returns ValNode list of CommentFeatPtr
ParsetRNAAndOtherElement(CharPtr str)4719 static ValNodePtr ParsetRNAAndOtherElement (CharPtr str)
4720 {
4721   CharPtr cp, other;
4722   ValNodePtr list = NULL;
4723   CommentFeatPtr cf;
4724 
4725   cp = str;
4726   if (StringNCmp(cp, "contains ", 9) == 0) {
4727     cp += 9;
4728   }
4729 
4730   other = StringSearch (cp, " and ");
4731   if (other == NULL) {
4732     return list;
4733   }
4734 
4735   while (cp < other) {
4736       cf = ParseGeneFromNoteForDefLine (&cp);
4737       if (cf == NULL) {
4738           list = CommentFeatListFree(list);
4739           return list;
4740       } else {
4741           ValNodeAddPointer (&list, MISCFEAT_TRNA_GENE, cf);
4742           while (*cp == ',' || isspace(*cp)) {
4743             cp ++;
4744           }
4745       }
4746   }
4747 
4748   other += 5;
4749 
4750   if (StringCmp(other, "control region") == 0 || StringCmp (other, "D-loop") == 0) {
4751     cf = (CommentFeatPtr) MemNew (sizeof (CommentFeatData));
4752     cf->product_name = StringSave(other);
4753     ValNodeAddPointer (&list, MISCFEAT_TRNA_GENE, cf);
4754   } else {
4755       list = CommentFeatListFree(list);
4756   }
4757   return list;
4758 }
4759 
4760 
IsTrnaPlusOther(SeqFeatPtr sfp)4761 static Boolean LIBCALLBACK IsTrnaPlusOther (
4762   SeqFeatPtr sfp
4763 )
4764 {
4765   ValNodePtr list;
4766   Boolean rval = FALSE;
4767 
4768   if (sfp == NULL ||
4769       sfp->idx.subtype != FEATDEF_misc_feature ||
4770       sfp->comment == NULL) {
4771     rval = FALSE;
4772   } else {
4773     list = ParsetRNAAndOtherElement(sfp->comment);
4774     if (list != NULL) {
4775       rval = TRUE;
4776     }
4777     list = CommentFeatListFree (list);
4778   }
4779   return rval;
4780 }
4781 
4782 
IsNoncodingProductFeat(SeqFeatPtr sfp)4783 static Boolean LIBCALLBACK IsNoncodingProductFeat (
4784   SeqFeatPtr sfp
4785 )
4786 {
4787   if ( sfp == NULL
4788     || sfp->idx.subtype != FEATDEF_misc_feature
4789     || sfp->comment == NULL
4790     || StringStr (sfp->comment, "intergenic") != NULL
4791     || IsParsableList (sfp)
4792     || IsTrnaPlusOther (sfp)
4793     || (find_noncoding_feature_keyword (sfp->comment) == NULL
4794       && (StringStr (sfp->comment, "nonfunctional ") == NULL
4795         || StringStr (sfp->comment, " due to ") == NULL)))
4796   {
4797     return FALSE;
4798   }
4799 
4800 
4801   return TRUE;
4802 }
4803 
GetNoncodingProductFeatProduct(SeqFeatPtr sfp)4804 static CharPtr GetNoncodingProductFeatProduct (
4805   SeqFeatPtr sfp
4806 )
4807 {
4808   CharPtr productname;
4809   Int4    namelen, compare_len;
4810   CharPtr name_start, sep;
4811 
4812   if (sfp == NULL || sfp->comment == NULL) return NULL;
4813 
4814   if ((name_start = StringStr (sfp->comment, "nonfunctional ")) != NULL
4815     && (sep = StringStr (sfp->comment, " due to ")) != NULL
4816     && sep > name_start)
4817   {
4818     productname = StringSave (name_start);
4819     productname [ sep - name_start] = 0;
4820     return productname;
4821   }
4822 
4823   name_start = find_noncoding_feature_keyword (sfp->comment);
4824   if (name_start == NULL) return NULL;
4825 
4826   sep = StringStr (name_start, ";");
4827   if (sep == NULL)
4828   {
4829     namelen = StringLen (name_start);
4830   }
4831   else
4832   {
4833     namelen = sep - name_start;
4834   }
4835 
4836   productname = MemNew (namelen + 6);
4837   if (productname == NULL) return NULL;
4838 
4839   StringNCpy (productname, name_start, namelen);
4840   productname [namelen] = 0;
4841 
4842   /* remove sequence from end of name if present */
4843   compare_len = StringLen (" sequence");
4844   if (StringCmp (productname + namelen - compare_len, " sequence") == 0)
4845   {
4846     productname [ namelen - compare_len] = 0;
4847     namelen = StringLen (productname);
4848   }
4849   /* add "-like" if not present */
4850   compare_len = StringLen ("-like");
4851   if (StringCmp (productname + namelen - compare_len, "-like") != 0)
4852   {
4853     StringCat (productname, "-like");
4854     namelen = StringLen (productname);
4855   }
4856   return productname;
4857 }
4858 
IsMiscFeat(SeqFeatPtr sfp)4859 static Boolean LIBCALLBACK IsMiscFeat (
4860   SeqFeatPtr sfp
4861 )
4862 {
4863   if ( sfp == NULL
4864     || sfp->idx.subtype != FEATDEF_misc_feature
4865     || sfp->comment == NULL)
4866   {
4867     return FALSE;
4868   }
4869 
4870   return TRUE;
4871 }
4872 
4873 
IsSatellite(SeqFeatPtr sfp)4874 static Boolean IsSatellite (SeqFeatPtr sfp)
4875 {
4876   GBQualPtr gbq;
4877   Boolean rval = FALSE;
4878 
4879   if ( sfp == NULL
4880     || sfp->idx.subtype != FEATDEF_repeat_region) {
4881     return FALSE;
4882   }
4883   for (gbq = sfp->qual; gbq != NULL && !rval; gbq = gbq->next) {
4884     if (StringICmp (gbq->qual, "satellite") == 0) {
4885       rval = TRUE;
4886     }
4887   }
4888   return rval;
4889 }
4890 
4891 // use comment or rpt_family
DoesRepeatRegionHaveLabel(SeqFeatPtr sfp)4892 static Boolean DoesRepeatRegionHaveLabel(SeqFeatPtr sfp)
4893 {
4894     GBQualPtr g;
4895 
4896     if (sfp == NULL || sfp->idx.subtype != FEATDEF_repeat_region) {
4897         return FALSE;
4898     }
4899     if (!StringHasNoText(sfp->comment)) {
4900         return TRUE;
4901     }
4902 
4903     for (g = sfp->qual; g != NULL; g = g->next) {
4904         if (StringICmp(g->qual, "rpt_family") == 0) {
4905             return TRUE;
4906         }
4907     }
4908     return FALSE;
4909 }
4910 
IsRepeatRegion(SeqFeatPtr sfp)4911 static Boolean LIBCALLBACK IsRepeatRegion (
4912   SeqFeatPtr sfp
4913 )
4914 {
4915   if ( sfp == NULL
4916     || sfp->idx.subtype != FEATDEF_repeat_region
4917     || !DoesRepeatRegionHaveLabel(sfp)
4918     || IsSatellite(sfp))
4919   {
4920     return FALSE;
4921   }
4922 
4923   return TRUE;
4924 }
4925 
RepeatRegionLabelFromString(CharPtr str)4926 static CharPtr RepeatRegionLabelFromString(CharPtr str)
4927 {
4928     CharPtr extra = "repeat region";
4929     CharPtr rval = NULL;
4930     Int4 len, extra_len;
4931 
4932     if (str == NULL) {
4933         return NULL;
4934     }
4935     len = StringLen(str);
4936     extra_len = StringLen(extra);
4937 
4938     if (len < extra_len || StringCmp(str + len - extra_len, extra) != 0) {
4939         rval = StringSave(str);
4940     } else {
4941         rval = (CharPtr)MemNew(sizeof(Char) * (1 + len - extra_len));
4942         StringNCpy(rval, str, len - extra_len);
4943         rval[len - extra_len] = 0;
4944     }
4945     return rval;
4946 }
4947 
GetRepeatRegionLabel(ValNodePtr featlist,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp)4948 static void LIBCALLBACK GetRepeatRegionLabel (
4949   ValNodePtr      featlist,
4950   BioseqPtr       bsp,
4951   Uint1           biomol,
4952   FeatureLabelPtr flp
4953 )
4954 {
4955   SeqFeatPtr main_feat;
4956   Boolean    found = FALSE;
4957   GBQualPtr  g;
4958 
4959   flp->description = NULL;
4960   flp->typeword = StringSave("repeat region");
4961   flp->pluralizable = FALSE;
4962   flp->is_typeword_first = FALSE;
4963 
4964   if (featlist == NULL) return;
4965   main_feat = featlist->data.ptrvalue;
4966   if (main_feat == NULL) return;
4967 
4968   for (g = main_feat->qual; g != NULL; g = g->next) {
4969       if (StringICmp(g->qual, "rpt_family") == 0) {
4970           flp->description = RepeatRegionLabelFromString(g->val);
4971           found = TRUE;
4972       }
4973   }
4974 
4975   if (!found && !StringHasNoText(main_feat->comment)) {
4976       flp->description = RepeatRegionLabelFromString(main_feat->comment);
4977   }
4978 }
4979 
4980 
IsOperon(SeqFeatPtr sfp)4981 static Boolean LIBCALLBACK IsOperon (
4982   SeqFeatPtr sfp
4983 )
4984 {
4985   if (sfp == NULL
4986     || sfp->idx.subtype != FEATDEF_operon)
4987   {
4988     return FALSE;
4989   }
4990 
4991   return TRUE;
4992 }
4993 
IsRecognizedFeature(SeqFeatPtr sfp)4994 static Boolean IsRecognizedFeature (
4995   SeqFeatPtr sfp
4996 )
4997 {
4998   if (IsGene (sfp)
4999     || IsCDS (sfp)
5000     || IsExon (sfp)
5001     || IsIntron (sfp)
5002     || IsLTR (sfp)
5003     || IsrRNA (sfp)
5004     || IstRNA (sfp)
5005     || IsmRNA (sfp)
5006     || IsMiscRNA (sfp)
5007     || IsncRNA (sfp)
5008     || IsPrecursorRNA (sfp)
5009     || Is3UTR (sfp)
5010     || Is5UTR (sfp)
5011     || IsMobileElement (sfp)
5012     || IsEndogenousVirusSequence (sfp)
5013     || IsEndogenousVirusSourceFeature (sfp)
5014     || IsDloop (sfp)
5015     || IsSatelliteSequence (sfp)
5016     || IsControlRegion (sfp)
5017     || IsIntergenicSpacer (sfp)
5018     || IsGeneCluster (sfp)
5019     || IsNoncodingProductFeat (sfp)
5020     || IsPromoter (sfp)
5021     || IsMiscFeat (sfp)
5022     || IsRepeatRegion (sfp)
5023     || IsOperon (sfp))
5024   {
5025     return TRUE;
5026   }
5027   else
5028   {
5029     return FALSE;
5030   }
5031 }
5032 
5033 /* The following section of code contains functions for dealing with lists of
5034  * clauses.
5035  */
5036 
5037 /* The functions for freeing the memory associated with lists of clauses
5038  * are recursive.
5039  */
5040 static void FreeListElement (ValNodePtr element);
5041 
5042 /* This function simply frees the ValNodePtr, since there is no extra
5043  * memory associated with a DEFLINE_FEATLIST item - the sfp that is
5044  * pointed to by data.ptrvalue came from the sequence indexing functions
5045  * and should under no circumstances be freed.
5046  */
FreeFeatlist(ValNodePtr featlist)5047 static void FreeFeatlist (
5048   ValNodePtr featlist
5049 )
5050 {
5051 
5052   if (featlist == NULL) return;
5053   ValNodeFree (featlist);
5054 }
5055 
5056 /* This function frees the memory associated with a FeatureClause, including
5057  * the memory associated with any subclauses.
5058  */
FreeClausePlusData(FeatureClausePtr fcp)5059 static void FreeClausePlusData (
5060   FeatureClausePtr fcp
5061 )
5062 {
5063   if (fcp->interval != NULL)
5064   {
5065     MemFree (fcp->interval);
5066     fcp->interval = NULL;
5067   }
5068   if (fcp->allelename != NULL)
5069   {
5070     MemFree (fcp->allelename);
5071     fcp->allelename = NULL;
5072   }
5073   if (fcp->feature_label_data.typeword != NULL)
5074   {
5075     MemFree (fcp->feature_label_data.typeword);
5076     fcp->feature_label_data.typeword = NULL;
5077   }
5078   if (fcp->feature_label_data.description != NULL)
5079   {
5080     MemFree (fcp->feature_label_data.description);
5081     fcp->feature_label_data.description = NULL;
5082   }
5083   if (fcp->feature_label_data.productname != NULL)
5084   {
5085     MemFree (fcp->feature_label_data.productname);
5086     fcp->feature_label_data.productname = NULL;
5087   }
5088   if (fcp->featlist != NULL)
5089   {
5090     FreeListElement (fcp->featlist);
5091     fcp->featlist = NULL;
5092   }
5093   if (fcp->slp != NULL)
5094   {
5095     SeqLocFree (fcp->slp);
5096   }
5097 }
5098 
5099 /* This function frees the data associated with the FeatureClause
5100  * and then frees the ValNode.
5101  */
FreeClausePlus(ValNodePtr clauseplus)5102 static void FreeClausePlus (
5103   ValNodePtr clauseplus
5104 )
5105 {
5106   FeatureClausePtr data_struct;
5107 
5108   if (clauseplus == NULL) return;
5109   data_struct = (FeatureClausePtr) clauseplus->data.ptrvalue;
5110   if (data_struct != NULL)
5111   {
5112     FreeClausePlusData (data_struct);
5113     MemFree (data_struct);
5114     clauseplus->data.ptrvalue = NULL;
5115   }
5116   ValNodeFree (clauseplus);
5117 }
5118 
5119 /* This function frees a list of DEFLINE_FEATLIST, DEFLINE_REMOVEFEAT,
5120  * and DEFLINE_CLAUSEPLUS items, starting with the last item in the list.
5121  * It recursively frees memory associated with subclauses.
5122  */
FreeListElement(ValNodePtr element)5123 static void FreeListElement (
5124   ValNodePtr element
5125 )
5126 {
5127   if (element == NULL) return;
5128 
5129   FreeListElement (element->next);
5130   element->next = NULL;
5131   if (element->choice == DEFLINE_FEATLIST
5132     || element->choice == DEFLINE_REMOVEFEAT)
5133   {
5134     FreeFeatlist (element);
5135   }
5136   else if (element->choice == DEFLINE_CLAUSEPLUS)
5137   {
5138     FreeClausePlus (element);
5139   }
5140 }
5141 
5142 /* This function excises from the list pointed to by head all of the clauses
5143  * with the delete_me flag set to TRUE and all of the ValNodes with a choice
5144  * of DEFLINE_REMOVEFEAT.
5145  */
DeleteFeatureClauses(ValNodePtr PNTR head)5146 static void DeleteFeatureClauses (
5147   ValNodePtr PNTR head
5148 )
5149 {
5150   ValNodePtr vnp, prev;
5151   FeatureClausePtr fcp;
5152   Boolean          delete_this_one;
5153 
5154   if (head == NULL) return;
5155 
5156   prev = NULL;
5157   vnp = *head;
5158   while (vnp != NULL)
5159   {
5160     delete_this_one = FALSE;
5161 
5162     if (vnp->choice == DEFLINE_CLAUSEPLUS)
5163     {
5164       fcp = vnp->data.ptrvalue;
5165       if (fcp == NULL || fcp->delete_me || fcp->featlist == NULL)
5166       {
5167         delete_this_one = TRUE;
5168       }
5169       else
5170       {
5171         DeleteFeatureClauses (&fcp->featlist);
5172         if (fcp->featlist == NULL) delete_this_one = TRUE;
5173       }
5174     }
5175     else if (vnp->choice == DEFLINE_REMOVEFEAT)
5176     {
5177       delete_this_one = TRUE;
5178     }
5179 
5180     if (delete_this_one)
5181     {
5182       if (prev == NULL)
5183       {
5184         *head = vnp->next;
5185         vnp->next = NULL;
5186         FreeListElement (vnp);
5187         if (*head == NULL) return;
5188         vnp = *head;
5189       }
5190       else
5191       {
5192         prev->next = vnp->next;
5193         vnp->next = NULL;
5194         FreeListElement (vnp);
5195         vnp = prev->next;
5196       }
5197     }
5198     else
5199     {
5200       prev = vnp;
5201       vnp = vnp->next;
5202     }
5203   }
5204 }
5205 
5206 /* This function counts the number of features in the feature list that
5207  * satisfy the itemmatch function (or all of them, if itemmatch is NULL).
5208  * If recurse_past_found_item, the function will not count features in
5209  * subclauses of features that satisfy itemmatch.
5210  */
CountFeatures(ValNodePtr clause_list,matchFunction itemmatch,Boolean recurse_past_found_item)5211 static Int4 CountFeatures (
5212   ValNodePtr clause_list,
5213   matchFunction  itemmatch,
5214   Boolean    recurse_past_found_item
5215 )
5216 {
5217   ValNodePtr       vnp;
5218   Int4             num_features;
5219   FeatureClausePtr fcp;
5220 
5221   num_features = 0;
5222   for (vnp = clause_list;
5223        vnp != NULL;
5224        vnp = vnp->next)
5225   {
5226     if (vnp->choice == DEFLINE_FEATLIST
5227       && (itemmatch == NULL || itemmatch (vnp->data.ptrvalue)))
5228     {
5229       num_features++;
5230       if (! recurse_past_found_item)
5231       {
5232         return num_features;
5233       }
5234     }
5235     else if (vnp->choice == DEFLINE_CLAUSEPLUS
5236       && (fcp = vnp->data.ptrvalue) != NULL)
5237     {
5238       num_features += CountFeatures (fcp->featlist,
5239                                      itemmatch,
5240                                      recurse_past_found_item);
5241     }
5242   }
5243   return num_features;
5244 }
5245 
5246 /* The following section of code contains functions for grouping features. */
5247 
5248 typedef struct matchruledata {
5249   matchFunction is_item;
5250   Int4          num_match_rules;
5251   matchFunction *match_rules;
5252 } MatchRuleData, PNTR MatchRulePtr;
5253 
InitRuleForTopLevelClauses(MatchRulePtr mrp)5254 static void InitRuleForTopLevelClauses (MatchRulePtr mrp)
5255 {
5256   if (mrp == NULL)
5257   {
5258     return;
5259   }
5260   mrp->num_match_rules = 4;
5261   mrp->match_rules = MemNew (mrp->num_match_rules
5262                                     * sizeof (matchFunction));
5263   if (mrp->match_rules == NULL) return;
5264   mrp->match_rules[0] = IsMobileElement;
5265   mrp->match_rules[1] = IsEndogenousVirusSourceFeature;
5266   mrp->match_rules[2] = IsOperon;
5267   mrp->match_rules[3] = IsGeneCluster;
5268 }
5269 
InitRuleForBottomLevelClauses(MatchRulePtr mrp)5270 static void InitRuleForBottomLevelClauses (MatchRulePtr mrp)
5271 {
5272   if (mrp == NULL)
5273   {
5274     return;
5275   }
5276   mrp->num_match_rules = 6;
5277   mrp->match_rules = MemNew (mrp->num_match_rules
5278                                     * sizeof (matchFunction));
5279   if (mrp->match_rules == NULL) return;
5280   mrp->match_rules[0] = IsCDS;
5281   mrp->match_rules[1] = IsmRNA;
5282   mrp->match_rules[2] = IsGene;
5283   mrp->match_rules[3] = IsEndogenousVirusSourceFeature;
5284   mrp->match_rules[4] = IsOperon;
5285   mrp->match_rules[5] = IsGeneCluster;
5286 }
5287 
5288 /* NumGroupingRules is the number of features for which there is a list of
5289  * features to group under.
5290  * When grouping features, each feature in the list is examined sequentially.
5291  * If there is a rule set that applies to that feature, the entire feature
5292  * list is searched for each feature type that this feature might group
5293  * beneath.  This preserves the biological order that was generated by the
5294  * original listing of features by sequence indexing.
5295  */
5296 #define  NumGroupingRules 13
InitializeGroupingRules()5297 static MatchRulePtr InitializeGroupingRules()
5298 {
5299   MatchRulePtr grouping_rules;
5300 
5301   grouping_rules = MemNew (NumGroupingRules * sizeof (MatchRuleData));
5302   if (grouping_rules == NULL) return NULL;
5303 
5304   grouping_rules[0].is_item = IsExon;
5305   grouping_rules[0].num_match_rules = 8;
5306   grouping_rules[0].match_rules = MemNew (grouping_rules[0].num_match_rules
5307                                     * sizeof (matchFunction));
5308   if (grouping_rules[0].match_rules == NULL) return NULL;
5309   grouping_rules[0].match_rules[0] = IsCDS;
5310   grouping_rules[0].match_rules[1] = IsNoncodingProductFeat;
5311   grouping_rules[0].match_rules[2] = IsDloop;
5312   grouping_rules[0].match_rules[3] = IsmRNA;
5313   grouping_rules[0].match_rules[4] = IsGene;
5314   grouping_rules[0].match_rules[5] = IsEndogenousVirusSourceFeature;
5315   grouping_rules[0].match_rules[6] = IsOperon;
5316   grouping_rules[0].match_rules[7] = IsGeneCluster;
5317 
5318   grouping_rules[1].is_item = IsIntron;
5319   grouping_rules[1].num_match_rules = 8;
5320   grouping_rules[1].match_rules = MemNew (grouping_rules[1].num_match_rules
5321                                     * sizeof (matchFunction));
5322   if (grouping_rules[1].match_rules == NULL) return NULL;
5323   grouping_rules[1].match_rules[0] = IsCDS;
5324   grouping_rules[1].match_rules[1] = IsNoncodingProductFeat;
5325   grouping_rules[1].match_rules[2] = IstRNA;
5326   grouping_rules[1].match_rules[3] = IsDloop;
5327   grouping_rules[1].match_rules[4] = IsGene;
5328   grouping_rules[1].match_rules[5] = IsEndogenousVirusSourceFeature;
5329   grouping_rules[1].match_rules[6] = IsOperon;
5330   grouping_rules[1].match_rules[7] = IsGeneCluster;
5331 
5332   grouping_rules[2].is_item = IsPromoter;
5333   InitRuleForBottomLevelClauses (grouping_rules + 2);
5334 
5335   grouping_rules[3].is_item = IsCDS;
5336   grouping_rules[3].num_match_rules = 5;
5337   grouping_rules[3].match_rules = MemNew (grouping_rules[3].num_match_rules
5338                                     * sizeof (matchFunction));
5339   if (grouping_rules[3].match_rules == NULL) return NULL;
5340   grouping_rules[3].match_rules[0] = IsmRNA;
5341   grouping_rules[3].match_rules[1] = IsMobileElement;
5342   grouping_rules[3].match_rules[2] = IsEndogenousVirusSourceFeature;
5343   grouping_rules[3].match_rules[3] = IsOperon;
5344   grouping_rules[3].match_rules[4] = IsGeneCluster;
5345 
5346   grouping_rules[4].is_item = IsMobileElement;
5347   InitRuleForTopLevelClauses (grouping_rules + 4);
5348 
5349   grouping_rules[5].is_item = Is3UTR;
5350   InitRuleForBottomLevelClauses (grouping_rules + 5);
5351 
5352   grouping_rules[6].is_item = Is5UTR;
5353   InitRuleForBottomLevelClauses (grouping_rules + 6);
5354 
5355   grouping_rules[7].is_item = IsLTR;
5356   InitRuleForBottomLevelClauses (grouping_rules + 7);
5357 
5358   grouping_rules[8].is_item = IsGene;
5359   InitRuleForTopLevelClauses (grouping_rules + 8);
5360 
5361   grouping_rules[9].is_item = IsIntergenicSpacer;
5362   InitRuleForTopLevelClauses (grouping_rules + 9);
5363 
5364   grouping_rules[10].is_item = IsNoncodingProductFeat;
5365   InitRuleForTopLevelClauses (grouping_rules + 10);
5366 
5367   grouping_rules[11].is_item = IsOperon;
5368   InitRuleForTopLevelClauses (grouping_rules + 11);
5369 
5370   grouping_rules[12].is_item = IsGeneCluster;
5371   InitRuleForTopLevelClauses (grouping_rules + 12);
5372 
5373   return grouping_rules;
5374 }
5375 
FreeGroupingRules(MatchRulePtr grouping_rules)5376 static void FreeGroupingRules(
5377   MatchRulePtr grouping_rules
5378 )
5379 {
5380   Int4 i;
5381 
5382   if (grouping_rules == NULL) return;
5383 
5384   for (i = 0; i < NumGroupingRules; i++)
5385   {
5386     if (grouping_rules[i].match_rules != NULL)
5387     MemFree (grouping_rules[i].match_rules);
5388     grouping_rules[i].match_rules = NULL;
5389   }
5390   MemFree (grouping_rules);
5391 }
5392 
IsmRNASequence(BioseqPtr bsp)5393 static Boolean IsmRNASequence (BioseqPtr bsp)
5394 {
5395   SeqDescrPtr sdp;
5396   MolInfoPtr  mip;
5397 
5398   if (bsp == NULL || bsp->mol != Seq_mol_rna || bsp->descr == NULL)
5399   {
5400     return FALSE;
5401   }
5402   sdp = bsp->descr;
5403   while (sdp != NULL && sdp->choice != Seq_descr_molinfo)
5404   {
5405     sdp = sdp->next;
5406   }
5407   if (sdp == NULL || sdp->data.ptrvalue == NULL)
5408   {
5409     return FALSE;
5410   }
5411   mip = (MolInfoPtr) sdp->data.ptrvalue;
5412 
5413   if (mip->biomol == 3)
5414   {
5415     return TRUE;
5416   }
5417   else
5418   {
5419     return FALSE;
5420   }
5421 }
5422 
5423 typedef struct matchcandidate
5424 {
5425   ValNodePtr matched_clause;
5426   SeqLocPtr  slp;
5427 } MatchCandidateData, PNTR MatchCandidatePtr;
5428 
5429 /* This function searches the search_list for features that satisfy the
5430  * match function and satisfy locational requirements relative to the
5431  * clause.
5432  * If more than one clause meets the match requirements, the smallest one
5433  * is chosen.
5434  */
FindBestMatchCandidate(FeatureClausePtr clause,ValNodePtr search_list,FeatureClausePtr search_parent,matchFunction match,Boolean gene_cluster_opp_strand,BioseqPtr bsp,MatchCandidatePtr current_candidate)5435 static void FindBestMatchCandidate
5436 (FeatureClausePtr  clause,
5437  ValNodePtr        search_list,
5438  FeatureClausePtr  search_parent,
5439  matchFunction     match,
5440  Boolean           gene_cluster_opp_strand,
5441  BioseqPtr         bsp,
5442  MatchCandidatePtr current_candidate)
5443 {
5444   ValNodePtr       search_clause;
5445   SeqFeatPtr       addsfp, clause_sfp;
5446   FeatureClausePtr searchfcp;
5447   SeqLocPtr        slp;
5448 
5449   if (clause == NULL || clause->slp == NULL || current_candidate == NULL) return;
5450 
5451   clause_sfp = (SeqFeatPtr) (clause->featlist->data.ptrvalue);
5452 
5453   for (search_clause = search_list;
5454        search_clause != NULL;
5455        search_clause = search_clause->next)
5456   {
5457     if (search_clause->data.ptrvalue == clause)
5458       continue;
5459     if (search_clause->choice == DEFLINE_FEATLIST
5460       && search_clause->data.ptrvalue != NULL)
5461     {
5462       addsfp = search_clause->data.ptrvalue;
5463       /* slp is the location of the feature we are trying to
5464        * group this feature with
5465        */
5466       if (search_parent != NULL)
5467       {
5468         slp = search_parent->slp;
5469       }
5470       else
5471       {
5472         slp = addsfp->location;
5473       }
5474       if (match (search_clause->data.ptrvalue))
5475       {
5476         /* Transposons, insertion sequences, integrons, and endogenous virii
5477          * take subfeatures regardless of whether the subfeature is
5478          * on the same strand.
5479          * Gene Clusters can optionally take subfeatures on either
5480          * strand (gene_cluster_opp_strand is flag).
5481          * Promoters will match up to features that are adjacent.
5482          * Introns will match up to coding regions whose intervals
5483          * are adjacent to the endpoints of the intron, or to other
5484          * features if the intron location is inside the other feature.
5485          * All other feature matches must be that the feature to
5486          * go into the clause must fit inside the location of the
5487          * other clause.
5488          */
5489         if (((match == IsMobileElement
5490              || match == IsEndogenousVirusSourceFeature
5491              || (match == IsGeneCluster && gene_cluster_opp_strand))
5492             && SeqLocAinB (clause->slp, slp) > -1)
5493           || IsLocAInBonSameStrand (clause->slp, slp)
5494           || ( IsPromoter (clause_sfp)
5495             && search_parent != NULL
5496             && IsAAdjacentToB (clause->slp, search_parent->slp, bsp,
5497                                ADJACENT_TYPE_UPSTREAM, TRUE))
5498           || (IsmRNASequence (bsp)
5499               && match != IsMobileElement
5500               && match != IsEndogenousVirusSourceFeature
5501               && match != IsGeneCluster)
5502           || (match == IsCDS
5503               && IsIntron (clause_sfp)
5504               && IsAEmptyIntervalOfB (clause->slp, slp, bsp))
5505           || (match == IsCDS
5506               && IsExon (clause_sfp)
5507               && LocAContainsIntervalOfB (clause->slp, slp)))
5508         {
5509           /* if we don't already have a candidate, or if this
5510            * candidate's location is inside the current candidate,
5511            * take this candidate.
5512            */
5513           if (current_candidate->matched_clause == NULL
5514               || SeqLocAinB (slp, current_candidate->slp) > 0)
5515           {
5516             current_candidate->matched_clause = search_clause;
5517             current_candidate->slp = slp;
5518           }
5519         }
5520       }
5521     }
5522     else if (search_clause->choice == DEFLINE_CLAUSEPLUS
5523       && search_clause->data.ptrvalue != NULL)
5524     {
5525       searchfcp = search_clause->data.ptrvalue;
5526       FindBestMatchCandidate (clause, searchfcp->featlist, searchfcp,
5527                               match, gene_cluster_opp_strand, bsp,
5528                               current_candidate);
5529     }
5530   }
5531 }
5532 
5533 
5534 /* This function iterates through the matches in the specified grouping rule.
5535  * If more than one match is found, the clause with the smallest location is
5536  * used.
5537  * If a match is found, the clause is added to the list of clauses for that
5538  * feature's parent clause.
5539  */
GroupClauseByRule(FeatureClausePtr clause,ValNodePtr search_list,MatchRulePtr grouping_rule,Boolean gene_cluster_opp_strand,BioseqPtr bsp)5540 static Boolean GroupClauseByRule (
5541   FeatureClausePtr clause,
5542   ValNodePtr       search_list,
5543   MatchRulePtr     grouping_rule,
5544   Boolean          gene_cluster_opp_strand,
5545   BioseqPtr        bsp
5546 )
5547 {
5548   Int4               rule_index;
5549   MatchCandidateData mcd;
5550   Boolean            rval = FALSE;
5551   ValNodePtr         newfeat;
5552 
5553   mcd.slp = NULL;
5554   mcd.matched_clause = NULL;
5555 
5556   for (rule_index = 0;
5557        rule_index < grouping_rule->num_match_rules;
5558        rule_index ++)
5559   {
5560 
5561     FindBestMatchCandidate (clause, search_list, NULL,
5562                             grouping_rule->match_rules[rule_index],
5563                             gene_cluster_opp_strand, bsp, &mcd);
5564   }
5565   if (mcd.matched_clause != NULL)
5566   {
5567     newfeat = ValNodeNew (mcd.matched_clause);
5568     if (newfeat == NULL) return FALSE;
5569     newfeat->choice = DEFLINE_CLAUSEPLUS;
5570     newfeat->data.ptrvalue = clause;
5571     rval = TRUE;
5572   }
5573   return rval;
5574 }
5575 
5576 
5577 /* This function determines whether a subclause contains just a 3' UTR feature
5578  * and no other details.
5579  */
Is3UTRClause(FeatureClausePtr clause)5580 static Boolean Is3UTRClause (FeatureClausePtr clause)
5581 {
5582   if (clause == NULL
5583       || clause->featlist == NULL
5584       || clause->featlist->choice != DEFLINE_FEATLIST
5585       || clause->featlist->data.ptrvalue == NULL
5586       || clause->featlist->next != NULL)
5587   {
5588     return FALSE;
5589   }
5590   return Is3UTR(clause->featlist->data.ptrvalue);
5591 }
5592 
5593 
5594 /* This function will move 3' UTRs to the end of any subfeat lists
5595  * so that they can be listed after the partial/complete CDS.
5596  */
Move3UTRToEndOfSubFeatList(ValNodePtr clause_list)5597 static void Move3UTRToEndOfSubFeatList (ValNodePtr clause_list)
5598 {
5599   ValNodePtr vnp, prev, last_vnp;
5600   FeatureClausePtr clause;
5601 
5602   if (clause_list == NULL || clause_list->next == NULL)
5603   {
5604     return;
5605   }
5606   prev = clause_list;
5607   for (vnp = clause_list->next; vnp != NULL; vnp = vnp->next)
5608   {
5609     if (vnp->data.ptrvalue != NULL && vnp->choice == DEFLINE_CLAUSEPLUS)
5610     {
5611       clause = vnp->data.ptrvalue;
5612       if (Is3UTRClause (clause))
5613       {
5614         if (vnp->next != NULL)
5615         {
5616           /* move to end of clause list */
5617           last_vnp = vnp->next;
5618           while (last_vnp->next != NULL)
5619           {
5620             last_vnp = last_vnp->next;
5621           }
5622           prev->next = vnp->next;
5623           last_vnp->next = vnp;
5624           vnp->next = NULL;
5625         }
5626       }
5627       else
5628       {
5629         prev = vnp;
5630         Move3UTRToEndOfSubFeatList (clause->featlist);
5631       }
5632     }
5633     else
5634     {
5635       prev = vnp;
5636     }
5637   }
5638 }
5639 
5640 /* This function iterates over the list of features, attempting to find and
5641  * apply grouping rules for each feature.
5642  */
GroupAllClauses(ValNodePtr PNTR clause_list,Boolean gene_cluster_opp_strand,BioseqPtr bsp)5643 static void GroupAllClauses (
5644   ValNodePtr PNTR clause_list,
5645   Boolean         gene_cluster_opp_strand,
5646   BioseqPtr       bsp
5647 )
5648 {
5649   MatchRulePtr     grouping_rules;
5650   ValNodePtr       vnp, prev;
5651   FeatureClausePtr clause;
5652   SeqFeatPtr       main_feat;
5653   Int4             rule_index;
5654 
5655   grouping_rules = InitializeGroupingRules();
5656   if (grouping_rules == NULL) return;
5657 
5658   for (vnp = *clause_list; vnp != NULL; vnp = vnp->next)
5659   {
5660     if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL)
5661     {
5662       clause = vnp->data.ptrvalue;
5663       if (clause->featlist != NULL
5664         && clause->featlist->choice == DEFLINE_FEATLIST
5665         && clause->featlist->data.ptrvalue != NULL)
5666       {
5667         main_feat = clause->featlist->data.ptrvalue;
5668         for (rule_index = 0;
5669              rule_index < NumGroupingRules
5670                && ! grouping_rules[rule_index].is_item (main_feat);
5671              rule_index++)
5672         {
5673         }
5674         if (rule_index < NumGroupingRules)
5675         {
5676           if ( GroupClauseByRule (clause, *clause_list,
5677                                   grouping_rules + rule_index,
5678                                   gene_cluster_opp_strand,
5679                                   bsp))
5680           {
5681             vnp->data.ptrvalue = NULL;
5682           }
5683         }
5684       }
5685     }
5686   }
5687   FreeGroupingRules(grouping_rules);
5688 
5689   vnp = *clause_list;
5690   prev = NULL;
5691   while (vnp != NULL)
5692   {
5693     if (vnp->data.ptrvalue == NULL)
5694     {
5695       if (prev == NULL)
5696       {
5697         *clause_list = vnp->next;
5698         vnp->next = NULL;
5699         ValNodeFree (vnp);
5700         vnp = *clause_list;
5701       }
5702       else
5703       {
5704         prev->next = vnp->next;
5705         vnp->next = NULL;
5706         ValNodeFree (vnp);
5707         vnp = prev->next;
5708       }
5709     }
5710     else
5711     {
5712       prev = vnp;
5713       vnp = vnp->next;
5714     }
5715   }
5716 
5717   Move3UTRToEndOfSubFeatList (*clause_list);
5718 }
5719 
5720 /* This function exists to handle the special case where two or more exons
5721  * are alternatively spliced, but there are no CDSs to represent some of the
5722  * alternatively spliced forms.  In order to make sure that all of the exons
5723  * that are alternatively spliced together appear with the CDS, they are
5724  * temporarily consolidated into a single clause with a location that
5725  * is the intersection of the exons' locations.  The clause will be
5726  * re-expanded after grouping by the ExpandAltSplicedExons function.
5727  */
GroupAltSplicedExons(ValNodePtr PNTR clause_list,BioseqPtr bsp,Boolean delete_now)5728 static void GroupAltSplicedExons (
5729   ValNodePtr PNTR clause_list,
5730   BioseqPtr       bsp,
5731   Boolean         delete_now
5732 )
5733 {
5734   ValNodePtr       clause, search_clause, vnp;
5735   FeatureClausePtr fcp, search_fcp;
5736   SeqFeatPtr       sfp, search_sfp;
5737   SeqLocPtr        new_slp;
5738 
5739   if (clause_list == NULL) return;
5740 
5741   for (clause = *clause_list; clause != NULL; clause = clause->next)
5742   {
5743     if (clause->choice != DEFLINE_CLAUSEPLUS
5744       || clause->data.ptrvalue == NULL)
5745     {
5746       continue;
5747     }
5748     fcp = clause->data.ptrvalue;
5749     if ( ! fcp->is_alt_spliced
5750       || fcp->delete_me
5751       || fcp->featlist == NULL
5752       || fcp->featlist->choice != DEFLINE_FEATLIST)
5753     {
5754       continue;
5755     }
5756     sfp = fcp->featlist->data.ptrvalue;
5757     if ( ! IsExon (sfp))
5758     {
5759       continue;
5760     }
5761 
5762     for ( search_clause = clause->next;
5763           search_clause != NULL
5764             && search_clause->choice == DEFLINE_CLAUSEPLUS
5765             && search_clause->data.ptrvalue != NULL
5766             && (search_fcp = search_clause->data.ptrvalue) != NULL
5767             && ! search_fcp->delete_me
5768             && search_fcp->is_alt_spliced
5769             && search_fcp->featlist != NULL
5770             && search_fcp->featlist->choice == DEFLINE_FEATLIST
5771             && (search_sfp = search_fcp->featlist->data.ptrvalue) != NULL
5772             && IsExon (search_sfp)
5773             && TestFeatOverlap (sfp, search_sfp, SIMPLE_OVERLAP) != -1;
5774           search_clause = search_clause->next)
5775     {
5776       vnp = ValNodeNew (fcp->featlist);
5777       if (vnp == NULL) return;
5778       vnp->choice = DEFLINE_FEATLIST;
5779       vnp->data.ptrvalue = search_sfp;
5780       search_fcp->delete_me = TRUE;
5781       new_slp = SeqLocIntersection (fcp->slp, search_fcp->slp, bsp);
5782       SeqLocFree (fcp->slp);
5783       fcp->slp = new_slp;
5784     }
5785   }
5786   if (delete_now)
5787   {
5788     DeleteFeatureClauses (clause_list);
5789   }
5790 }
5791 
5792 /* This function expands a clause filled with alternatively-spliced exons
5793  * that was created in the GroupAltSplicedExons function.
5794  */
ExpandAltSplicedExons(ValNodePtr clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)5795 static void ExpandAltSplicedExons (
5796   ValNodePtr clause_list,
5797   BioseqPtr  bsp,
5798   DeflineFeatureRequestListPtr rp)
5799 {
5800   ValNodePtr clause, rest_of_list, featlist, new_clause = NULL;
5801   FeatureClausePtr fcp, new_fcp;
5802   SeqFeatPtr sfp;
5803 
5804   for (clause = clause_list;
5805        clause != NULL;
5806        clause = clause->next)
5807   {
5808     if (clause->choice != DEFLINE_CLAUSEPLUS
5809       || (fcp = clause->data.ptrvalue) == NULL
5810       || fcp->featlist == NULL)
5811     {
5812       continue;
5813     }
5814     if ( fcp->featlist->choice == DEFLINE_FEATLIST
5815       && (sfp = fcp->featlist->data.ptrvalue) != NULL
5816       && IsExon (sfp)
5817       && fcp->featlist->next != NULL
5818       && fcp->featlist->next->choice == DEFLINE_FEATLIST
5819       && IsExon (fcp->featlist->next->data.ptrvalue))
5820     {
5821       rest_of_list = clause->next;
5822       clause->next = NULL;
5823       for (featlist = fcp->featlist->next;
5824            featlist != NULL
5825              && featlist->choice == DEFLINE_FEATLIST
5826              && IsExon (featlist->data.ptrvalue);
5827            featlist = featlist->next)
5828       {
5829         new_clause = ValNodeNew (clause);
5830         if (new_clause == NULL) return;
5831         new_fcp = NewFeatureClause (featlist->data.ptrvalue, bsp, rp);
5832         if (new_fcp == NULL) return;
5833         new_fcp->grp = fcp->grp;
5834         new_fcp->is_alt_spliced = fcp->is_alt_spliced;
5835         new_fcp->make_plural = fcp->make_plural;
5836         new_clause->choice = DEFLINE_CLAUSEPLUS;
5837         new_clause->data.ptrvalue = new_fcp;
5838       }
5839       ValNodeFree (fcp->featlist->next);
5840       fcp->featlist->next = NULL;
5841       if (new_clause != NULL) {
5842         new_clause->next = rest_of_list;
5843       }
5844 
5845       /* put back location for first exon - was reduced to union of
5846        * all exon intervals in GroupAltSplicedExons
5847        */
5848       SeqLocFree (fcp->slp);
5849       sfp = fcp->featlist->data.ptrvalue;
5850       fcp->slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, TRUE, FALSE);
5851     }
5852     else
5853     {
5854       ExpandAltSplicedExons (fcp->featlist, bsp, rp);
5855     }
5856   }
5857 }
5858 
5859 
5860 
DoFeaturesShareGene(SeqFeatPtr sfp1,SeqFeatPtr sfp2)5861 static Boolean DoFeaturesShareGene (SeqFeatPtr sfp1, SeqFeatPtr sfp2)
5862 {
5863   Boolean share_gene = FALSE;
5864   SeqFeatPtr found_gene1, found_gene2;
5865 
5866   if (sfp1 != NULL && sfp2 != NULL
5867       && !SeqMgrGeneIsSuppressed (SeqMgrGetGeneXref(sfp1))
5868       && !SeqMgrGeneIsSuppressed (SeqMgrGetGeneXref(sfp2)))
5869   {
5870     found_gene1 = SeqMgrGetOverlappingGene (sfp1->location, NULL);
5871     found_gene2 = SeqMgrGetOverlappingGene (sfp2->location, NULL);
5872     if (found_gene1 == found_gene2 && found_gene1 != NULL)
5873     {
5874       share_gene = TRUE;
5875     }
5876   }
5877   return share_gene;
5878 }
5879 
5880 /* This function determines whether two features share the same product name */
5881 static Boolean
DoProductNamesMatch(SeqFeatPtr sfp1,SeqFeatPtr sfp2,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)5882 DoProductNamesMatch
5883 (SeqFeatPtr sfp1,
5884  SeqFeatPtr sfp2,
5885  BioseqPtr  bsp,
5886  DeflineFeatureRequestListPtr rp)
5887 {
5888   CharPtr productname1;
5889   CharPtr productname2;
5890   Boolean names_match = FALSE;
5891 
5892   productname1 = GetProductName (sfp1, bsp, rp);
5893   productname2 = GetProductName (sfp2, bsp, rp);
5894   if (StringHasNoText (productname1) && StringHasNoText (productname2))
5895   {
5896     names_match = TRUE;
5897   }
5898   else if (StringCmp (productname1, productname2) == 0)
5899   {
5900     names_match = TRUE;
5901   }
5902 
5903   productname1 = MemFree (productname1);
5904   productname2 = MemFree (productname2);
5905 
5906   return names_match;
5907 }
5908 
5909 /* This function should combine CDSs that do not have a joined location
5910  * but are part of the same gene and have the same protein name.
5911  */
GroupSegmentedCDSs(ValNodePtr PNTR clause_list,BioseqPtr bsp,Boolean delete_now,DeflineFeatureRequestListPtr rp)5912 static void GroupSegmentedCDSs (
5913   ValNodePtr PNTR clause_list,
5914   BioseqPtr       bsp,
5915   Boolean         delete_now,
5916   DeflineFeatureRequestListPtr rp
5917 )
5918 {
5919   ValNodePtr       clause, search_clause, vnp;
5920   FeatureClausePtr fcp, search_fcp;
5921   SeqFeatPtr       sfp, search_sfp;
5922   SeqLocPtr        new_slp;
5923 
5924   if (clause_list == NULL) return;
5925 
5926   for (clause = *clause_list; clause != NULL; clause = clause->next)
5927   {
5928     if (clause->choice != DEFLINE_CLAUSEPLUS
5929       || clause->data.ptrvalue == NULL)
5930     {
5931       continue;
5932     }
5933     fcp = clause->data.ptrvalue;
5934     if (fcp->delete_me
5935       || fcp->featlist == NULL
5936       || fcp->featlist->choice != DEFLINE_FEATLIST)
5937     {
5938       continue;
5939     }
5940     sfp = fcp->featlist->data.ptrvalue;
5941     if ( ! IsCDS (sfp))
5942     {
5943       continue;
5944     }
5945 
5946     for ( search_clause = clause->next;
5947           search_clause != NULL;
5948           search_clause = search_clause->next)
5949     {
5950       if (search_clause->choice != DEFLINE_CLAUSEPLUS
5951           || search_clause->data.ptrvalue == NULL
5952           || (search_fcp = search_clause->data.ptrvalue) == NULL
5953           || search_fcp->delete_me
5954           || search_fcp->featlist == NULL
5955           || search_fcp->featlist->choice != DEFLINE_FEATLIST
5956           || (search_sfp = search_fcp->featlist->data.ptrvalue) == NULL
5957           || ! IsCDS (search_sfp)
5958           || ! DoFeaturesShareGene (sfp, search_sfp)
5959           || ! DoProductNamesMatch (sfp, search_sfp, bsp, rp))
5960       {
5961         continue;
5962       }
5963       vnp = ValNodeNew (fcp->featlist);
5964       if (vnp == NULL) return;
5965       vnp->choice = DEFLINE_FEATLIST;
5966       vnp->data.ptrvalue = search_sfp;
5967       search_fcp->delete_me = TRUE;
5968       new_slp = SeqLocMerge (bsp, fcp->slp, search_fcp->slp,
5969                              FALSE, TRUE, FALSE);
5970 
5971       SeqLocFree (fcp->slp);
5972       fcp->slp = new_slp;
5973     }
5974   }
5975   if (delete_now)
5976   {
5977     DeleteFeatureClauses (clause_list);
5978   }
5979 }
5980 
5981 
5982 /* This function searches this list for clauses to which this gene should
5983  * apply.  This is not taken care of by the GroupAllClauses function
5984  * because genes are added to clauses as a GeneRefPtr instead of as an
5985  * additional feature in the list, and because a gene can apply to more
5986  * than one clause, while other features should really only belong to
5987  * one clause.
5988  */
AddGeneToClauses(SeqFeatPtr gene,CharPtr gene_productname,ValNodePtr clause_list,Boolean suppress_locus_tag)5989 static Boolean AddGeneToClauses
5990 ( SeqFeatPtr gene,
5991   CharPtr    gene_productname,
5992   ValNodePtr clause_list,
5993   Boolean    suppress_locus_tag)
5994 {
5995   ValNodePtr    clause;
5996   FeatureClausePtr fcp;
5997   SeqFeatPtr    sfp, found_gene;
5998   GeneRefPtr    grp;
5999   Boolean    used_gene;
6000 
6001   if (gene == NULL || gene->data.value.ptrvalue == NULL) return FALSE;
6002   if (clause_list == NULL) return FALSE;
6003 
6004   used_gene = FALSE;
6005   grp = gene->data.value.ptrvalue;
6006   for (clause = clause_list; clause != NULL; clause = clause->next)
6007   {
6008     fcp = clause->data.ptrvalue;
6009     if (fcp == NULL || fcp->featlist == NULL) return FALSE;
6010     sfp = fcp->featlist->data.ptrvalue;
6011     if (sfp != NULL && !SeqMgrGeneIsSuppressed (SeqMgrGetGeneXref(sfp))
6012         && (IsCDS (sfp)
6013             || IsrRNA (sfp)
6014             || IstRNA (sfp)
6015             || IsmRNA (sfp)
6016             || IsMiscRNA (sfp)
6017             || IsncRNA (sfp)
6018             || IsPrecursorRNA (sfp)
6019             || IsNoncodingProductFeat (sfp)))
6020     {
6021       if (fcp->grp == NULL)
6022       {
6023         found_gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
6024         if (found_gene != NULL)
6025         {
6026           fcp->grp = (GeneRefPtr) found_gene->data.value.ptrvalue;
6027         }
6028       }
6029 
6030       if (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp, suppress_locus_tag))
6031       {
6032         used_gene = TRUE;
6033         if (gene_productname != NULL
6034           && fcp->feature_label_data.productname == NULL
6035           && IsCDS (sfp))
6036         {
6037           fcp->feature_label_data.productname =
6038               StringSave (gene_productname);
6039         }
6040       }
6041       else if (fcp->grp == NULL
6042         && IsLocAInBonSameStrand (sfp->location, gene->location))
6043       {
6044         fcp->grp = grp;
6045         used_gene = TRUE;
6046         if (gene_productname != NULL
6047           && fcp->feature_label_data.productname == NULL
6048           && IsCDS (sfp))
6049         {
6050           fcp->feature_label_data.productname =
6051               StringSave (gene_productname);
6052         }
6053       }
6054     }
6055   }
6056   return used_gene;
6057 }
6058 
6059 /* This function iterates through the list of features and calls
6060  * AddGeneToClauses for each gene feature it finds.
6061  */
GroupGenes(ValNodePtr PNTR clause_list,Boolean suppress_locus_tag)6062 static void GroupGenes (ValNodePtr PNTR clause_list, Boolean suppress_locus_tag)
6063 {
6064   ValNodePtr  vnp;
6065   ValNodePtr  featlist;
6066   FeatureClausePtr fcp;
6067 
6068   for (vnp = *clause_list; vnp != NULL; vnp = vnp->next)
6069   {
6070     if (vnp->choice != DEFLINE_CLAUSEPLUS) return;
6071     fcp = (FeatureClausePtr) vnp->data.ptrvalue;
6072     if (fcp == NULL) return;
6073 
6074     featlist = fcp->featlist;
6075     if (featlist != NULL
6076       && featlist->choice == DEFLINE_FEATLIST
6077       && IsGene (featlist->data.ptrvalue))
6078     {
6079       AddGeneToClauses (featlist->data.ptrvalue,
6080                         fcp->feature_label_data.productname,
6081                         vnp->next, suppress_locus_tag);
6082     }
6083   }
6084 }
6085 
6086 /* This function searches this list for clauses to which this mRNA should
6087  * apply.  This is not taken care of by the GroupAllClauses function
6088  * because when an mRNA is added to a CDS, the product for the clause is
6089  * replaced and the location for the clause is expanded, rather than simply
6090  * adding the mRNA as an additional feature in the list, and because an
6091  * mRNA can apply to more than one clause, while other features should
6092  * really only belong to one clause.
6093  */
AddmRNAToClauses(SeqFeatPtr mRNA,ValNodePtr clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6094 static Boolean AddmRNAToClauses
6095 ( SeqFeatPtr mRNA,
6096   ValNodePtr clause_list,
6097   BioseqPtr  bsp,
6098   DeflineFeatureRequestListPtr rp)
6099 {
6100   ValNodePtr    clause;
6101   FeatureClausePtr fcp;
6102   SeqFeatPtr    sfp;
6103   Boolean    used_mRNA;
6104   CharPtr       productname;
6105   SeqLocPtr     new_slp;
6106 
6107   if (mRNA == NULL || mRNA->data.value.ptrvalue == NULL) return FALSE;
6108   if (clause_list == NULL) return FALSE;
6109 
6110   used_mRNA = FALSE;
6111   productname = GetProductName (mRNA, bsp, rp);
6112   if (productname == NULL) return TRUE;
6113 
6114   for (clause = clause_list; clause != NULL; clause = clause->next)
6115   {
6116     fcp = clause->data.ptrvalue;
6117     if (fcp == NULL || fcp->featlist == NULL) return FALSE;
6118     sfp = fcp->featlist->data.ptrvalue;
6119     if (sfp == NULL)
6120     {
6121     }
6122     else if (IsCDS (sfp)
6123       && fcp->feature_label_data.productname != NULL
6124       && StringCmp (fcp->feature_label_data.productname, productname) == 0)
6125     {
6126       used_mRNA = TRUE;
6127       fcp->has_mrna = TRUE;
6128       if (IsLocAInBonSameStrand (sfp->location, mRNA->location))
6129       {
6130         new_slp = SeqLocMerge (bsp, fcp->slp, mRNA->location,
6131                                  FALSE, TRUE, FALSE);
6132         if (new_slp == NULL) return FALSE;
6133         if (fcp->slp != NULL)
6134         {
6135           SeqLocFree (fcp->slp);
6136         }
6137         fcp->slp = new_slp;
6138       }
6139     }
6140     else if (fcp->feature_label_data.productname == NULL
6141       && (IsCDS (sfp) || IsGene (sfp))
6142       && (IsLocAInBonSameStrand (sfp->location, mRNA->location)
6143         || IsLocAInBonSameStrand (mRNA->location, sfp->location)))
6144     {
6145       fcp->feature_label_data.productname = StringSave (productname);
6146       used_mRNA = TRUE;
6147       fcp->has_mrna = TRUE;
6148       new_slp = SeqLocMerge (bsp, fcp->slp, mRNA->location,
6149                                  FALSE, TRUE, FALSE);
6150       if (new_slp == NULL) return FALSE;
6151       if (fcp->slp != NULL)
6152       {
6153         SeqLocFree (fcp->slp);
6154       }
6155       fcp->slp = new_slp;
6156     }
6157   }
6158   return used_mRNA;
6159 }
6160 
6161 /* This function iterates through the list of features and calls
6162  * AddmRNAToClauses for each mRNA feature it finds.
6163  */
GroupmRNAs(ValNodePtr PNTR clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6164 static void GroupmRNAs (
6165   ValNodePtr PNTR clause_list,
6166   BioseqPtr  bsp,
6167   DeflineFeatureRequestListPtr rp
6168 )
6169 {
6170   ValNodePtr  vnp;
6171   ValNodePtr  featlist;
6172   FeatureClausePtr fcp;
6173 
6174   for (vnp = *clause_list; vnp != NULL; vnp = vnp->next)
6175   {
6176     if (vnp->choice != DEFLINE_CLAUSEPLUS) return;
6177     fcp = (FeatureClausePtr) vnp->data.ptrvalue;
6178     if (fcp == NULL) return;
6179 
6180     featlist = fcp->featlist;
6181     if (featlist != NULL
6182       && featlist->choice == DEFLINE_FEATLIST
6183       && IsmRNA (featlist->data.ptrvalue))
6184     {
6185       if (AddmRNAToClauses (featlist->data.ptrvalue, *clause_list, bsp, rp))
6186       {
6187         fcp->delete_me = TRUE;
6188       }
6189     }
6190   }
6191   DeleteFeatureClauses (clause_list);
6192 }
6193 
6194 /* This section of code contains functions for generating labels for
6195  * clauses for the definition lines.
6196  */
6197 
6198 /* This function examines the specified typeword and determines whether it
6199  * should appear before or after the description of the feature in the
6200  * definition line.
6201  */
IsTypeWordFirst(CharPtr typeword)6202 static Boolean IsTypeWordFirst (
6203   CharPtr typeword
6204 )
6205 {
6206   Int4 i;
6207   if (typeword == NULL) return FALSE;
6208   if (StringCmp (typeword, "exon") == 0
6209     || StringCmp (typeword, "intron") == 0
6210     || StringCmp (typeword, "endogenous virus") == 0)
6211   {
6212     return TRUE;
6213   }
6214   else
6215   {
6216     i = StartsWithMobileElementKeyword (typeword);
6217     if (i >= 0 && i != eMobileElementIntegron) {
6218       return TRUE;
6219     }
6220     return FALSE;
6221   }
6222 }
6223 
6224 /* This function determines the word to use to indicate what type of feature
6225  * is being described in the definition line.  For certain feature types,
6226  * the word to use in the definition line varies based on the type of
6227  * molecule in the record.
6228  */
GetFeatureTypeWord(Uint1 biomol,SeqFeatPtr sfp)6229 static CharPtr GetFeatureTypeWord (
6230   Uint1 biomol,
6231   SeqFeatPtr sfp
6232 )
6233 {
6234   if (sfp == NULL) return NULL;
6235   if ( IsExon (sfp))
6236   {
6237     return StringSave ("exon");
6238   }
6239   else if(IsIntron (sfp))
6240   {
6241     return StringSave ("intron");
6242   }
6243   else if (IsEndogenousVirusSequence (sfp))
6244   {
6245     return StringSave ("endogenous virus");
6246   }
6247   else if (IsControlRegion (sfp))
6248   {
6249     return StringSave ("control region");
6250   }
6251   else if (IsEndogenousVirusSourceFeature (sfp))
6252   {
6253     return StringSave ("endogenous virus");
6254   }
6255   else if (IsDloop (sfp))
6256   {
6257     return StringSave ("D-loop");
6258   }
6259   else if (IsLTR (sfp))
6260   {
6261     return StringSave ("LTR");
6262   }
6263   else if (Is3UTR (sfp))
6264   {
6265     return StringSave ("3' UTR");
6266   }
6267   else if (Is5UTR (sfp))
6268   {
6269     return StringSave ("5' UTR");
6270   }
6271   else if (IsOperon (sfp))
6272   {
6273     return StringSave ("operon");
6274   }
6275   else if (biomol == MOLECULE_TYPE_GENOMIC || biomol == MOLECULE_TYPE_CRNA)
6276   {
6277     if (IsPseudo (sfp))
6278     {
6279       return StringSave ("pseudogene");
6280     }
6281     else
6282     {
6283       return StringSave ("gene");
6284     }
6285   }
6286   else if ( IsrRNA (sfp) || IsncRNA (sfp) || IsPrecursorRNA (sfp))
6287   {
6288     return NULL;
6289   }
6290   else if (biomol == MOLECULE_TYPE_MRNA)
6291   {
6292     if (IsPseudo (sfp))
6293     {
6294       return StringSave ("pseudogene mRNA");
6295     }
6296     else
6297     {
6298       return StringSave ("mRNA");
6299     }
6300   }
6301   else if (biomol == MOLECULE_TYPE_PRE_MRNA)
6302   {
6303     if (IsPseudo (sfp))
6304     {
6305       return StringSave ("pseudogene precursor RNA");
6306     }
6307     else
6308     {
6309       return StringSave ("precursor RNA");
6310     }
6311   }
6312   else if (biomol == MOLECULE_TYPE_OTHER_GENETIC_MATERIAL)
6313   {
6314     return StringSave ("gene");
6315   }
6316   return StringSave ("");
6317 }
6318 
6319 /* Frequently the product associated with a feature is listed as part of the
6320  * description of the feature in the definition line.  This function determines
6321  * the name of the product associated with this specific feature.  Some
6322  * features will be listed with the product of a feature that is associated
6323  * with the feature being described - this function does not look at other
6324  * features to determine a product name.
6325  * If the feature is a misc_feat with particular keywords in the comment,
6326  * the product will be determined based on the contents of the comment.
6327  * If the feature is a CDS and is marked as pseudo, the product will be
6328  * determined based on the contents of the comment.
6329  * If the feature is a gene and has different strings in the description than
6330  * in the locus or locus tag, the description will be used as the product for
6331  * the gene.
6332  * If none of the above conditions apply, the sequence indexing context label
6333  * will be used to obtain the product name for the feature.
6334  */
GetProductName(SeqFeatPtr cds,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6335 static CharPtr GetProductName
6336 ( SeqFeatPtr cds,
6337   BioseqPtr  bsp,
6338   DeflineFeatureRequestListPtr rp)
6339 {
6340   CharPtr protein_name;
6341   CharPtr semicolon;
6342   size_t len_to_copy;
6343   SeqMgrFeatContext  context;
6344   GeneRefPtr  grp;
6345   CharPtr gene_name;
6346   RnaRefPtr rrp;
6347   RNAGenPtr rgp;
6348   Boolean suppress_locus_tag = FALSE;
6349 
6350   if (cds == NULL) return NULL;
6351   protein_name = NULL;
6352   if (rp != NULL)
6353   {
6354     suppress_locus_tag = rp->suppress_locus_tags;
6355   }
6356   if (IsNoncodingProductFeat (cds))
6357   {
6358     return GetNoncodingProductFeatProduct (cds);
6359   }
6360   else if (cds->data.choice == SEQFEAT_CDREGION && cds->pseudo)
6361   {
6362     if (cds->comment != NULL)
6363     {
6364       semicolon = StringChr (cds->comment, ';');
6365       if (semicolon != NULL)
6366       {
6367         len_to_copy = semicolon - cds->comment;
6368       }
6369       else
6370       {
6371         len_to_copy = StringLen (cds->comment);
6372       }
6373       protein_name = MemNew (len_to_copy + 1);
6374       if (protein_name == NULL) return NULL;
6375       StringNCpy (protein_name, cds->comment, len_to_copy);
6376       protein_name[len_to_copy] = 0;
6377     }
6378     return protein_name;
6379   }
6380   else if (cds->data.choice == SEQFEAT_GENE)
6381   {
6382     grp = (GeneRefPtr) cds->data.value.ptrvalue;
6383     if (grp == NULL) return NULL;
6384     gene_name = GetGeneName (grp, suppress_locus_tag);
6385     if (grp->desc != NULL
6386       && StringCmp (grp->desc, gene_name) != 0)
6387     {
6388       return StringSave (grp->desc);
6389     }
6390 #if 0
6391     /* removed by request from Linda Yankie */
6392     if (grp->locus_tag != NULL && ! suppress_locus_tag
6393       && StringCmp (grp->locus_tag, gene_name) != 0)
6394     {
6395       return StringSave (grp->locus_tag);
6396     }
6397 #endif
6398   }
6399   else if (IsncRNA (cds))
6400   {
6401     return GetncRNAProduct(cds, rp == NULL ? FALSE : rp->use_ncrna_note);
6402   }
6403   else if (IstRNA (cds)
6404            && SeqMgrGetDesiredFeature (0, bsp, 0, 0, cds, &context) == cds
6405            && context.label != NULL)
6406   {
6407     if (StringCmp (context.label, "Xxx") == 0) {
6408       protein_name = StringSave ("tRNA-OTHER");
6409     } else {
6410       protein_name = MemNew ( StringLen (context.label) + 6);
6411       if ( protein_name == NULL) return NULL;
6412       sprintf (protein_name, "tRNA-%s", context.label);
6413     }
6414     return protein_name;
6415   }
6416   else if (cds->data.choice == SEQFEAT_RNA)
6417   {
6418     if ((rrp = (RnaRefPtr) cds->data.value.ptrvalue) != NULL)
6419     {
6420       if (rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue))
6421       {
6422         return StringSave (rrp->ext.value.ptrvalue);
6423       }
6424       else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && !StringHasNoText (rgp->product))
6425       {
6426         return StringSave (rgp->product);
6427       }
6428       else if (IsPrecursorRNA (cds))
6429       {
6430         return StringSave ("precursor RNA");
6431       }
6432     }
6433   }
6434   else if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, cds, &context) == cds
6435            && context.label != NULL)
6436   {
6437     if ((IsCDS(cds) && StringCmp (context.label, "CDS") != 0)
6438         || (IsmRNA(cds) && StringCmp (context.label, "mRNA") != 0)
6439         || (! IsCDS(cds) && ! IsmRNA(cds)))
6440     {
6441       protein_name = StringSave (context.label);
6442       return protein_name;
6443     }
6444   }
6445   return NULL;
6446 }
6447 
6448 /* This function searches a list of features recursively for a
6449  * feature that satisfies the itemmatch condition and is associated with
6450  * the same gene as the fcp clause passed to the function.
6451  * This is used to obtain a product for a feature that may share a gene with
6452  * a product-producing feature but may not be contained in the interval of
6453  * the product-producing feature.
6454  */
FindProductInFeatureList(FeatureClausePtr fcp,ValNodePtr clause_list,matchFunction itemmatch,Boolean suppress_locus_tag)6455 static FeatureClausePtr FindProductInFeatureList (
6456   FeatureClausePtr fcp,
6457   ValNodePtr       clause_list,
6458   matchFunction    itemmatch,
6459   Boolean          suppress_locus_tag)
6460 {
6461   ValNodePtr       vnp;
6462   FeatureClausePtr vnp_fcp;
6463 
6464   for (vnp = clause_list; vnp != NULL; vnp = vnp->next)
6465   {
6466     if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL)
6467     {
6468       vnp_fcp = vnp->data.ptrvalue;
6469       if (DoGenesMatch (vnp_fcp->grp, fcp->grp, suppress_locus_tag)
6470         && vnp_fcp->featlist != NULL
6471         && vnp_fcp->featlist->choice == DEFLINE_FEATLIST
6472         && itemmatch (vnp_fcp->featlist->data.ptrvalue))
6473       {
6474         return vnp_fcp;
6475       }
6476       else
6477       {
6478         vnp_fcp = FindProductInFeatureList (fcp, vnp_fcp->featlist,
6479                                             itemmatch, suppress_locus_tag);
6480         if (vnp_fcp != NULL) return vnp_fcp;
6481       }
6482     }
6483   }
6484   return NULL;
6485 }
6486 
6487 /* This function uses the available information in the clause to generate
6488  * a description from the name of the gene (if any) and the name of the
6489  * product for the feature (if any).
6490  * If there is only a gene, the description will be the name of the gene.
6491  * If there is only a product, the description will be the name of the product.
6492  * If there is a gene and a product, the description will be the name of
6493  * the product followed by the name of the gene in parentheses.
6494  */
GetGeneProtDescription(FeatureClausePtr fcp,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6495 static CharPtr GetGeneProtDescription
6496 ( FeatureClausePtr fcp,
6497   BioseqPtr        bsp,
6498   DeflineFeatureRequestListPtr rp)
6499 {
6500   SeqFeatPtr    sfp;
6501   CharPtr    protein_name;
6502   CharPtr    gene_name;
6503   size_t    description_length;
6504   CharPtr    description;
6505 
6506   if (fcp == NULL
6507     || fcp->featlist == NULL
6508     || fcp->featlist->data.ptrvalue == NULL)
6509   {
6510     return NULL;
6511   }
6512   sfp = fcp->featlist->data.ptrvalue;
6513 
6514   description_length = 0;
6515 
6516   if (fcp->feature_label_data.productname != NULL)
6517   {
6518     protein_name = StringSave (fcp->feature_label_data.productname);
6519   }
6520   else
6521   {
6522     protein_name = GetProductName (sfp, bsp, rp);
6523     if (protein_name == NULL && IsGene (sfp))
6524     {
6525 
6526     }
6527   }
6528   if (protein_name != NULL)
6529   {
6530     description_length += StringLen (protein_name);
6531   }
6532 
6533   gene_name = GetGeneName (fcp->grp, rp == NULL ? FALSE : rp->suppress_locus_tags);
6534   if (gene_name != NULL)
6535   {
6536     description_length += StringLen (gene_name);
6537     if (protein_name != NULL)
6538     {
6539       description_length += 3;
6540     }
6541   }
6542   description = (CharPtr) MemNew (description_length + 1);
6543   if (description == NULL) return NULL;
6544   if (protein_name != NULL)
6545   {
6546     if (gene_name != NULL)
6547     {
6548       sprintf (description, "%s (%s)", protein_name, gene_name);
6549     }
6550     else
6551     {
6552       sprintf (description, "%s", protein_name);
6553     }
6554   }
6555   else
6556   {
6557     if (gene_name != NULL)
6558       sprintf (description, "%s", gene_name);
6559   }
6560   if (protein_name != NULL) MemFree (protein_name);
6561   if (StringHasNoText (description)) {
6562     description = MemFree (description);
6563   }
6564   return description;
6565 }
6566 
6567 /* This array of match functions is used to identify, in order of preference,
6568  * the features that might be used to generate a product for a gene-protein
6569  * description if the feature has not already been grouped with a product
6570  * feature.
6571  */
6572 static matchFunction productfeatures[] = {
6573   IsCDS, IsmRNA, IstRNA
6574 };
6575 
6576 /* This function finds gene features without products and looks for
6577  * features that might provide products for them.
6578  */
FindGeneProducts(ValNodePtr clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6579 static void FindGeneProducts
6580 ( ValNodePtr clause_list,
6581   BioseqPtr  bsp,
6582   DeflineFeatureRequestListPtr rp)
6583 {
6584   ValNodePtr       vnp;
6585   FeatureClausePtr fcp, productfcp;
6586   Int4             i, NumProductFeatureTypes;
6587   Boolean          suppress_locus_tag = (rp == NULL ? FALSE : rp->suppress_locus_tags);
6588 
6589   NumProductFeatureTypes = sizeof (productfeatures) / sizeof (matchFunction);
6590 
6591   for (vnp = clause_list; vnp != NULL; vnp = vnp->next)
6592   {
6593     if (vnp->choice == DEFLINE_CLAUSEPLUS
6594       && (fcp = vnp->data.ptrvalue) != NULL
6595       && fcp->featlist != NULL)
6596     {
6597       if (fcp->featlist->choice == DEFLINE_FEATLIST
6598         && IsGene (fcp->featlist->data.ptrvalue)
6599         && fcp->feature_label_data.productname == NULL)
6600       {
6601         productfcp = NULL;
6602         for (i=0; i < NumProductFeatureTypes && productfcp == NULL; i++)
6603         {
6604           productfcp = FindProductInFeatureList (fcp, clause_list,
6605                                                  productfeatures[i],
6606                                                  suppress_locus_tag);
6607         }
6608         if (productfcp != NULL)
6609         {
6610           fcp->is_alt_spliced = productfcp->is_alt_spliced;
6611           if (productfcp->feature_label_data.productname != NULL)
6612           {
6613             fcp->feature_label_data.productname =
6614                   StringSave (productfcp->feature_label_data.productname);
6615           }
6616           else
6617           {
6618             fcp->feature_label_data.productname
6619                   = GetProductName (productfcp->featlist->data.ptrvalue,
6620                                     bsp, rp);
6621           }
6622           if (fcp->feature_label_data.description != NULL)
6623           {
6624             MemFree (fcp->feature_label_data.description);
6625             fcp->feature_label_data.description = NULL;
6626           }
6627           fcp->feature_label_data.description =
6628             GetGeneProtDescription (fcp, bsp, rp);
6629         }
6630       }
6631       else
6632       {
6633         FindGeneProducts (fcp->featlist, bsp, rp);
6634       }
6635     }
6636   }
6637 }
6638 
ShowInterval(SeqFeatPtr sfp)6639 static Boolean ShowInterval (
6640   SeqFeatPtr sfp
6641 )
6642 {
6643   if (IsSatelliteSequence (sfp) || IsExon (sfp) || IsIntron (sfp)
6644     || IsPromoter (sfp) || Is3UTR (sfp) || Is5UTR (sfp) || IsRepeatRegion(sfp))
6645     return FALSE;
6646   return TRUE;
6647 }
6648 
GetExonDescription(BioseqPtr bsp,SeqFeatPtr sfp)6649 static CharPtr GetExonDescription (
6650   BioseqPtr bsp,
6651   SeqFeatPtr sfp
6652 )
6653 {
6654   SeqMgrFeatContext  context;
6655   SeqFeatPtr new_sfp;
6656   CharPtr    label;
6657 
6658   if ((new_sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, bsp, 0, 0, sfp, &context)) != sfp
6659       || context.label == NULL)
6660   {
6661     if ((new_sfp = SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &context)) != sfp
6662       || context.label == NULL)
6663     {
6664       return NULL;
6665     }
6666   }
6667   if ((IsExon (sfp) && StringCmp (context.label, "exon") == 0)
6668     || (IsIntron (sfp) && StringCmp (context.label, "intron") == 0))
6669   {
6670     return NULL;
6671   }
6672 
6673   label = StringSave (context.label);
6674   return label;
6675 }
6676 
GetFeatureDescription(FeatureClausePtr fcp,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6677 static CharPtr GetFeatureDescription
6678 ( FeatureClausePtr fcp,
6679   BioseqPtr        bsp,
6680   DeflineFeatureRequestListPtr rp)
6681 {
6682   SeqFeatPtr    sfp;
6683 
6684   if ( fcp == NULL
6685     || fcp->featlist == NULL
6686     || fcp->featlist->data.ptrvalue == NULL)
6687   {
6688     return NULL;
6689   }
6690   sfp = fcp->featlist->data.ptrvalue;
6691   if (sfp == NULL) return NULL;
6692 
6693   if (IsExon (sfp) || IsIntron (sfp))
6694   {
6695     return GetExonDescription (bsp, sfp);
6696   }
6697   else if (IsEndogenousVirusSequence (sfp))
6698   {
6699     return GetEndogenousVirusSequenceDescription (sfp);
6700   }
6701   else if (IsEndogenousVirusSourceFeature (sfp))
6702   {
6703     return GetEndogenousVirusSourceFeatureDescription (sfp);
6704   }
6705   else if (IsControlRegion (sfp))
6706   {
6707     return NULL;
6708   }
6709   else if (IsDloop (sfp))
6710   {
6711     return NULL;
6712   }
6713   else if (Is3UTR (sfp))
6714   {
6715     return NULL;
6716   }
6717   else if (Is5UTR (sfp))
6718   {
6719     return NULL;
6720   }
6721   else if (IsLTR (sfp))
6722   {
6723     return GetLTRDescription (sfp);
6724   }
6725   else
6726   {
6727     return GetGeneProtDescription (fcp, bsp, rp);
6728   }
6729 }
6730 
GetSatelliteFeatureLabel(ValNodePtr featlist,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp)6731 static void LIBCALLBACK GetSatelliteFeatureLabel (
6732   ValNodePtr      featlist,
6733   BioseqPtr       bsp,
6734   Uint1           biomol,
6735   FeatureLabelPtr flp
6736 )
6737 {
6738   SeqFeatPtr main_feat;
6739   CharPtr    semicolon, colon;
6740   GBQualPtr  qual;
6741   Boolean    found = FALSE;
6742 
6743   flp->description = NULL;
6744   flp->typeword = StringSave ("sequence");
6745   flp->pluralizable = FALSE;
6746   flp->is_typeword_first = FALSE;
6747 
6748   if (featlist == NULL) return;
6749   main_feat = featlist->data.ptrvalue;
6750   if (main_feat == NULL) return;
6751   for (qual = main_feat->qual; qual != NULL && !found; qual = qual->next)
6752   {
6753     if (StringCmp (qual->qual, "satellite") == 0)
6754     {
6755       flp->description = StringSave (qual->val);
6756       if ((semicolon = StringStr (flp->description, ";")) != NULL)
6757       {
6758         *semicolon = 0;
6759       }
6760       if ((colon = StringChr (flp->description, ':')) != NULL)
6761       {
6762         *colon = ' ';
6763       }
6764     }
6765   }
6766 }
6767 
GetPromoterFeatureLabel(ValNodePtr featlist,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp)6768 static void LIBCALLBACK GetPromoterFeatureLabel (
6769   ValNodePtr      featlist,
6770   BioseqPtr       bsp,
6771   Uint1           biomol,
6772   FeatureLabelPtr flp
6773 )
6774 {
6775   SeqFeatPtr main_feat;
6776 
6777   flp->description = NULL;
6778   flp->typeword = StringSave ("promoter region");
6779 
6780   if (featlist == NULL) return;
6781   main_feat = featlist->data.ptrvalue;
6782   if (main_feat == NULL) return;
6783 
6784   flp->description =  NULL;
6785   flp->pluralizable = FALSE;
6786   flp->is_typeword_first = FALSE;
6787 
6788 }
6789 
6790 /* This function temporarily removes a 3' UTR clause from the end of
6791  * a clause list so that it will not be included in the list of subfeatures
6792  * before a CDS in the definition line.
6793  * The 3' UTR clause should be put back if it was not the only clause in the
6794  * list.
6795  */
Remove3UTRFromEndOfFeatList(ValNodePtr PNTR featlist)6796 static ValNodePtr Remove3UTRFromEndOfFeatList (ValNodePtr PNTR featlist)
6797 {
6798   ValNodePtr vnp, prev = NULL;
6799 
6800   if (featlist == NULL || *featlist == NULL) return NULL;
6801 
6802   for (vnp = *featlist; vnp != NULL && vnp->next != NULL; vnp = vnp->next)
6803   {
6804     prev = vnp;
6805   }
6806   if (vnp->choice == DEFLINE_CLAUSEPLUS && Is3UTRClause (vnp->data.ptrvalue))
6807   {
6808     if (prev == NULL)
6809     {
6810       *featlist = NULL;
6811     }
6812     else
6813     {
6814       prev->next = NULL;
6815     }
6816   }
6817   else
6818   {
6819     vnp = NULL;
6820   }
6821   return vnp;
6822 }
6823 
6824 static Uint1 GetMoleculeType (BioseqPtr bsp, Uint2     entityID);
6825 static void ConsolidateClauses (
6826   ValNodePtr PNTR list,
6827   BioseqPtr  bsp,
6828   Uint1      biomol,
6829   Boolean    delete_now,
6830   DeflineFeatureRequestListPtr rp);
6831 
6832 
6833 /* This function calculates the "interval" for a clause in the definition
6834  * line.  The interval could be an empty string, it could indicate whether
6835  * the location of the feature is partial or complete and whether or not
6836  * the feature is a CDS, the interval could be a description of the
6837  * subfeatures of the clause, or the interval could be a combination of the
6838  * last two items if the feature is a CDS.
6839  */
GetGenericInterval(FeatureClausePtr fcp,Uint1 biomol,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)6840 static CharPtr GetGenericInterval
6841 ( FeatureClausePtr fcp,
6842   Uint1            biomol,
6843   BioseqPtr        bsp,
6844   DeflineFeatureRequestListPtr rp)
6845 {
6846   CharPtr    interval;
6847   Boolean    partial5, partial3;
6848   SeqFeatPtr sfp;
6849   ValNodePtr featlist, strings, prev_feat;
6850   CharPtr    subfeatlist;
6851   Int4       len;
6852   Boolean    suppress_final_and;
6853   ValNodePtr utr3vnp = NULL;
6854   ValNodePtr last_feat;
6855   Uint1      molecule_type;
6856 
6857   if ( fcp == NULL || fcp->featlist == NULL) return NULL;
6858   if (fcp->is_unknown) return NULL;
6859   featlist = fcp->featlist;
6860   sfp = featlist->data.ptrvalue;
6861   if (sfp == NULL) return NULL;
6862   if ( IsExon (sfp) && fcp->is_alt_spliced)
6863   {
6864     return StringSave ("alternatively spliced");
6865   }
6866   if ( ! ShowInterval (sfp)) return NULL;
6867 
6868   if (IsIntergenicSpacer (sfp) && StringNCmp (sfp->comment, "may contain ", 12) == 0) {
6869     return StringSave ("region");
6870   }
6871 
6872   subfeatlist = NULL;
6873   len = 50;
6874   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
6875 
6876   strings = NULL;
6877   prev_feat = NULL;
6878   while (featlist != NULL && featlist->choice != DEFLINE_CLAUSEPLUS)
6879   {
6880     prev_feat = featlist;
6881     featlist = featlist->next;
6882   }
6883   if (IsCDS (sfp))
6884   {
6885     utr3vnp = Remove3UTRFromEndOfFeatList (&featlist);
6886   }
6887   if (featlist != NULL)
6888   {
6889     suppress_final_and = FALSE;
6890     if (( IsCDS (sfp) && ! fcp->clause_info_only)
6891         || utr3vnp != NULL)
6892     {
6893       suppress_final_and = TRUE;
6894     }
6895     LabelClauses (featlist, biomol, bsp, rp);
6896 
6897     molecule_type = GetMoleculeType (bsp, bsp->idx.entityID);
6898     /* consolidate genes/proteins with the same names (usually hypothetical proteins) */
6899     ConsolidateClauses (&featlist, bsp, molecule_type, TRUE,
6900                         rp);
6901 
6902     /* make sure featlist is still intact - may have consolidated it */
6903     if (prev_feat == NULL)
6904     {
6905       fcp->featlist = featlist;
6906     }
6907     else
6908     {
6909       prev_feat->next = featlist;
6910     }
6911 
6912     ListClauses (featlist, &strings, FALSE, suppress_final_and, rp->suppress_allele);
6913     subfeatlist = MergeValNodeStrings (strings, FALSE);
6914       ValNodeFreeData (strings);
6915     len += StringLen (subfeatlist) + 7;
6916 
6917     if (utr3vnp != NULL)
6918     {
6919       len += 14;
6920     }
6921   }
6922 
6923   interval = (CharPtr) MemNew (len * sizeof (Char));
6924   if (interval == NULL) return NULL;
6925   interval[0] = 0;
6926 
6927   if (StringDoesHaveText (subfeatlist))
6928   {
6929     StringCat (interval, subfeatlist);
6930     if ( ! IsCDS (sfp) || fcp->clause_info_only)
6931     {
6932       if (utr3vnp != NULL)
6933       {
6934         if (featlist != NULL && featlist->next != NULL)
6935         {
6936           StringCat (interval, ",");
6937         }
6938         StringCat (interval, " and 3' UTR");
6939         /* put 3' UTR back at end of featlist */
6940         if (featlist != NULL)
6941         {
6942           last_feat = featlist;
6943           while (last_feat != NULL && last_feat->next != NULL)
6944           {
6945             last_feat = last_feat->next;
6946           }
6947           last_feat->next = utr3vnp;
6948         }
6949       }
6950       if (subfeatlist != NULL) MemFree (subfeatlist);
6951       return interval;
6952     }
6953     if (utr3vnp == NULL)
6954     {
6955       StringCat (interval, " and ");
6956     }
6957     else
6958     {
6959       StringCat (interval, ", ");
6960     }
6961   }
6962 
6963   if (FeatureDoesNotGetPartialComplete (sfp))
6964   {
6965     /* don't add partial or complete */
6966   }
6967   else if (partial5 || partial3)
6968   {
6969     StringCat (interval, "partial ");
6970   }
6971   else
6972   {
6973     StringCat (interval, "complete ");
6974   }
6975   if (IsCDS (sfp) && ! IsPseudo (sfp))
6976   {
6977     StringCat (interval, "cds");
6978     if (fcp->is_alt_spliced)
6979       StringCat (interval, ", alternatively spliced");
6980   }
6981   else
6982   {
6983     StringCat (interval, "sequence");
6984     if (IsNoncodingProductFeat (sfp) && fcp->is_alt_spliced)
6985     {
6986       StringCat (interval, ", alternatively spliced");
6987     }
6988   }
6989 
6990   if (utr3vnp != NULL)
6991   {
6992     /* tack UTR3 on at end of clause */
6993     if (StringDoesHaveText (subfeatlist))
6994     {
6995       StringCat (interval, ",");
6996     }
6997     StringCat (interval, " and 3' UTR");
6998 
6999     /* put 3' UTR back at end of featlist */
7000     if (featlist != NULL)
7001     {
7002       last_feat = featlist;
7003       while (last_feat != NULL && last_feat->next != NULL)
7004       {
7005         last_feat = last_feat->next;
7006       }
7007       last_feat->next = utr3vnp;
7008     }
7009   }
7010 
7011   if (subfeatlist != NULL) MemFree (subfeatlist);
7012 
7013   return interval;
7014 }
7015 
7016 
7017 /* This function is used to generate feature label information for
7018  * a feature clause.  It is called by the LabelFeature function if
7019  * a "GetFeatureLabel" function is not found for the specific feature
7020  * type.
7021  * In the future it may be advisable to create "GetFeatureLabel" functions
7022  * for more of the specific feature types, to reduce the number of times
7023  * that the feature must be identified as being a certain type.
7024  */
GetGenericFeatureLabel(FeatureClausePtr fcp,BioseqPtr bsp,Uint1 biomol,FeatureLabelPtr flp,DeflineFeatureRequestListPtr rp)7025 static void LIBCALLBACK GetGenericFeatureLabel
7026 ( FeatureClausePtr fcp,
7027   BioseqPtr        bsp,
7028   Uint1            biomol,
7029   FeatureLabelPtr  flp,
7030   DeflineFeatureRequestListPtr rp)
7031 {
7032   SeqFeatPtr main_feat;
7033 
7034   if (fcp == NULL
7035     || fcp->featlist == NULL
7036     || fcp->featlist->data.ptrvalue == NULL)
7037   {
7038     return;
7039   }
7040   main_feat = fcp->featlist->data.ptrvalue;
7041   if (main_feat == NULL) return;
7042 
7043   if (flp->typeword == NULL)
7044   {
7045     flp->typeword = GetFeatureTypeWord (biomol, main_feat);
7046     flp->is_typeword_first = IsTypeWordFirst (flp->typeword);
7047     flp->pluralizable = TRUE;
7048   }
7049   if (flp->productname == NULL)
7050   {
7051     flp->productname = GetProductName (main_feat, bsp, rp);
7052   }
7053   if (flp->description == NULL
7054     && (! IsMiscRNA (main_feat)
7055       || StringStr (flp->productname, "spacer") == NULL ))
7056   {
7057     flp->description = GetFeatureDescription (fcp, bsp, rp);
7058   }
7059 
7060 }
7061 
7062 typedef void (LIBCALLBACK *GetFeatureLabelFunction) (
7063   ValNodePtr      featlist,
7064   BioseqPtr       bsp,
7065   Uint1           biomol,
7066   FeatureLabelPtr flp
7067 );
7068 
7069 typedef struct matchlabelfunction {
7070   matchFunction           itemmatch;
7071   GetFeatureLabelFunction labelfunction;
7072 } MatchLabelFunctionData, PNTR MatchLabelFunctionPtr;
7073 
7074 static MatchLabelFunctionData label_functions[] = {
7075  { IsSatelliteSequence, GetSatelliteFeatureLabel         },
7076  { IsMobileElement,     GetMobileElementFeatureLabel        },
7077  { IsPromoter,          GetPromoterFeatureLabel          },
7078  { IsIntergenicSpacer,  GetIntergenicSpacerFeatureLabel  },
7079  { IsGeneCluster,       GetGeneClusterFeatureLabel       },
7080  { IsRepeatRegion,      GetRepeatRegionLabel             }
7081 };
7082 
7083 typedef enum {
7084  DEFLINE_FEATLABEL_Satellite = 0,
7085  DEFLINE_FEATLABEL_Transposon,
7086  DEFLINE_FEATLABEL_Promoter,
7087  DEFLINE_FEATLABEL_IntergenicSpacer,
7088  DEFLINE_FEATLABEL_GeneCluster,
7089  DEFLINE_FEATLABEL_RepeatRegion,
7090  NumDefLineFeatLabels
7091 } DefLineFeatLabel;
7092 
LabelFeature(BioseqPtr bsp,Uint1 biomol,FeatureClausePtr new_clauseplus,DeflineFeatureRequestListPtr rp)7093 static void LabelFeature
7094 ( BioseqPtr        bsp,
7095   Uint1            biomol,
7096   FeatureClausePtr new_clauseplus,
7097   DeflineFeatureRequestListPtr rp)
7098 {
7099   Int4             i;
7100   SeqFeatPtr       main_feat;
7101 
7102   if (new_clauseplus == NULL || new_clauseplus->featlist == NULL) return;
7103 
7104   if (new_clauseplus->featlist->choice == DEFLINE_FEATLIST)
7105   {
7106     main_feat = (SeqFeatPtr) new_clauseplus->featlist->data.ptrvalue;
7107 
7108     new_clauseplus->allelename = GetAlleleName (new_clauseplus->grp,
7109                                                 rp == NULL ? FALSE : rp->suppress_locus_tags);
7110     if (new_clauseplus->interval == NULL)
7111     {
7112       new_clauseplus->interval =
7113                   GetGenericInterval (new_clauseplus, biomol, bsp, rp);
7114     }
7115 
7116     for (i=0; i < NumDefLineFeatLabels; i++)
7117     {
7118       if (label_functions [i].itemmatch (main_feat))
7119       {
7120         label_functions [i].labelfunction ( new_clauseplus->featlist,
7121                                           bsp, biomol,
7122                                           &new_clauseplus->feature_label_data);
7123         return;
7124       }
7125     }
7126 
7127     GetGenericFeatureLabel ( new_clauseplus, bsp, biomol,
7128                            &new_clauseplus->feature_label_data, rp);
7129     return;
7130   }
7131 }
7132 
7133 /* This function is used to calculate the parts of a product name that
7134  * are "the same" for use as the name of an alternatively spliced product.
7135  * The common portion of the string must end at a recognized separator,
7136  * such as a space, comma, or dash instead of in the middle of a word.
7137  * The matching portions of the string could occur at the beginning or end
7138  * of the string, or even occasionally at the beginning and end of a
7139  * string, but not as the center of the string with a different beginning
7140  * and ending.
7141  */
FindStringIntersection(CharPtr str1,CharPtr str2,Boolean str1_previously_stripped)7142 static CharPtr FindStringIntersection (
7143   CharPtr str1,
7144   CharPtr str2,
7145   Boolean str1_previously_stripped
7146 )
7147 {
7148   Int4 matchleftlen = 0;
7149   Int4 matchlefttoken = 0;
7150   Int4 matchrightidx1 = 0;
7151   Int4 matchrightidx2 = 0;
7152   Int4 matchrighttoken = 0;
7153   CharPtr match_string;
7154   Int4 len1;
7155   Int4 len2;
7156   Int4 match_len;
7157 
7158   if (str1 == NULL || str2 == NULL) return NULL;
7159   if (StringCmp (str1, str2) == 0) return StringSave (str1);
7160   len1 = StringLen (str1);
7161   len2 = StringLen (str2);
7162 
7163   while (str1[matchleftlen] != 0 && str2[matchleftlen] != 0
7164          && str1[matchleftlen] == str2[matchleftlen])
7165   {
7166     if (str1 [matchleftlen] == ','
7167       || str1 [matchleftlen] == '-')
7168     {
7169       matchlefttoken = matchleftlen;
7170     }
7171     else if (str1 [matchleftlen] == ' '
7172       && matchlefttoken != matchleftlen - 1)
7173     {
7174       matchlefttoken = matchleftlen;
7175     }
7176     matchleftlen++;
7177   }
7178   if (matchleftlen == len1 && str1_previously_stripped)
7179   {
7180     matchlefttoken = matchleftlen;
7181   }
7182   else
7183   {
7184     matchleftlen = matchlefttoken;
7185   }
7186 
7187   matchrightidx1 = len1;
7188   matchrightidx2 = len2;
7189 
7190   while (matchrightidx1 > -1 && matchrightidx2 > -1
7191          && str1[matchrightidx1] == str2[matchrightidx2])
7192   {
7193     if (str1 [matchrightidx1] == ' '
7194       || str1[matchrightidx1] == ','
7195       || str1[matchrightidx1] == '-')
7196     {
7197       matchrighttoken = matchrightidx1;
7198     }
7199     matchrightidx1--;
7200     matchrightidx2--;
7201   }
7202   if (matchrightidx1 == -1)
7203   {
7204     matchrighttoken = matchrightidx1;
7205   }
7206   else if (matchrighttoken > 0)
7207   {
7208     matchrightidx1 = matchrighttoken;
7209   }
7210   else if (str1_previously_stripped && matchrightidx1 < len1 - 1)
7211   {
7212     /* matchrightidx1 = matchrighttoken; */
7213     /* do nothing, leave right index where it is */
7214   }
7215   else
7216   {
7217     matchrightidx1 = len1;
7218   }
7219 
7220   match_len = matchleftlen;
7221   if (matchrightidx1 < len1 - 1)
7222   {
7223     match_len += len1 - matchrightidx1 - 1;
7224   }
7225 
7226   if (match_len <= 0) return NULL;
7227 
7228   match_string = MemNew (match_len + 2);
7229   if (match_string == NULL) return NULL;
7230   if (matchleftlen != 0)
7231   {
7232     StringNCpy (match_string, str1, matchleftlen);
7233     match_string[matchleftlen] = 0;
7234   }
7235   else
7236   {
7237     match_string[0] = 0;
7238   }
7239   if (matchrightidx1 < len1)
7240   {
7241     if (match_string[0] != 0) StringCat (match_string, " ");
7242     StringCat (match_string, str1 + matchrightidx1 + 1);
7243   }
7244   return match_string;
7245 }
7246 
7247 /* These are the words that are used to introduced the part of the protein
7248  * name that differs in alt-spliced products - they should not be part of
7249  * the alt-spliced product name.
7250  * Note that splice variant is listed before "variant" so that it will be
7251  * found first and "variant" will not be removed from "splice variant", leaving
7252  * splice as an orphan.
7253  */
7254 
7255 static CharPtr UnwantedWords [] = {
7256  "splice variant",
7257  "splice product",
7258  "variant",
7259  "isoform"
7260 };
7261 
TrimUnwantedWordsFromAltSpliceProductName(CharPtr productname)7262 static void TrimUnwantedWordsFromAltSpliceProductName (
7263   CharPtr productname
7264 )
7265 {
7266   Int4    num_unwanted_words, i;
7267   size_t  unwanted_word_len, diff;
7268   CharPtr cp, tmp;
7269 
7270   num_unwanted_words = sizeof (UnwantedWords) / sizeof (CharPtr);
7271   for (i = 0; i < num_unwanted_words; i++)
7272   {
7273     unwanted_word_len = StringLen (UnwantedWords [i]);
7274     cp = StringStr (productname, UnwantedWords [i]);
7275     if (cp != NULL)
7276     {
7277       diff = cp - productname;
7278       if (diff == 0)
7279       {
7280         /* word occurs in beginning of phrase */
7281         tmp = StringSave (productname + unwanted_word_len);
7282         StringCpy (productname, tmp);
7283         MemFree (tmp);
7284       }
7285       else if (diff < StringLen (productname) - unwanted_word_len)
7286       {
7287         /* word occurs in middle of phrase */
7288         tmp = StringSave (cp + unwanted_word_len);
7289         StringCpy (cp - 1, tmp);
7290         MemFree (tmp);
7291       }
7292       else
7293       {
7294         /* word occurs at end of phrase */
7295         *cp = 0;
7296       }
7297     }
7298   }
7299 }
7300 
7301 
PreviouslyStripped(SeqFeatPtr cds,BioseqPtr bsp,CharPtr productname)7302 static Boolean PreviouslyStripped (SeqFeatPtr cds, BioseqPtr bsp, CharPtr productname)
7303 {
7304   CharPtr expected_product_name;
7305   Boolean rval = FALSE;
7306 
7307   if (cds == NULL || StringHasNoText (productname)) return FALSE;
7308   expected_product_name = GetProductName (cds, bsp, FALSE);
7309   if (StringCmp (productname, expected_product_name) != 0) {
7310     rval = TRUE;
7311   }
7312   expected_product_name = MemFree (expected_product_name);
7313   return rval;
7314 }
7315 
7316 /* This function determines whether two CDSs meet the conditions for
7317  * alternative splicing, and if so, it returns the name of the alternatively
7318  * spliced product.  In order to be alternatively spliced, the two CDSs
7319  * must have the same gene, must share a complete interval, and must have
7320  * similarly named products.
7321  */
MeetAltSpliceRules(FeatureClausePtr cdsfcp1,FeatureClausePtr cdsfcp2,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7322 static CharPtr MeetAltSpliceRules
7323 ( FeatureClausePtr cdsfcp1,
7324   FeatureClausePtr cdsfcp2,
7325   BioseqPtr        bsp,
7326   DeflineFeatureRequestListPtr rp)
7327 {
7328   SeqFeatPtr cds1, cds2;
7329   CharPtr match_string;
7330   Int4    res;
7331 
7332   if (cdsfcp1 == NULL || cdsfcp2 == NULL
7333     || cdsfcp1->featlist == NULL || cdsfcp2->featlist == NULL)
7334   {
7335     return NULL;
7336   }
7337 
7338   cds1 = cdsfcp1->featlist->data.ptrvalue;
7339   cds2 = cdsfcp2->featlist->data.ptrvalue;
7340   if (! DoGenesMatch (cdsfcp1->grp, cdsfcp2->grp, rp == NULL ? FALSE : rp->suppress_locus_tags))
7341     return NULL;
7342 
7343   if ( (res = TestFeatOverlap (cds1, cds2, COMMON_INTERVAL)) != -1)
7344   {
7345     match_string = FindStringIntersection (
7346                      cdsfcp1->feature_label_data.productname,
7347                      cdsfcp2->feature_label_data.productname,
7348                      PreviouslyStripped(cds1, bsp, cdsfcp1->feature_label_data.productname));
7349     return match_string;
7350   }
7351   return NULL;
7352 }
7353 
7354 /* This function is used by the FindAltSplices function to locate the
7355  * next CDS in a list of feature clauses.
7356  */
FindNextCDSClause(ValNodePtr vnp)7357 static ValNodePtr FindNextCDSClause (ValNodePtr vnp)
7358 {
7359   FeatureClausePtr fcp;
7360 
7361   while (vnp != NULL)
7362   {
7363     if (vnp->choice == DEFLINE_CLAUSEPLUS)
7364     {
7365       fcp = vnp->data.ptrvalue;
7366       if (fcp != NULL && !fcp->delete_me && fcp->featlist != NULL
7367         && IsCDS (fcp->featlist->data.ptrvalue))
7368       {
7369         return vnp;
7370       }
7371     }
7372     vnp = vnp->next;
7373   }
7374   return NULL;
7375 }
7376 
7377 /* This function is used by the FindAltSplices function to move the features
7378  * and subclauses from the second CDS in an alternatively spliced pair of
7379  * CDSs to the feature clause for the first CDS, so that the subfeatures
7380  * can be properly listed.
7381  */
MoveSubclauses(FeatureClausePtr dstfcp,FeatureClausePtr srcfcp)7382 static void MoveSubclauses (
7383   FeatureClausePtr dstfcp,
7384   FeatureClausePtr srcfcp
7385 )
7386 {
7387   ValNodePtr dst_last_feat, dst_first_clause, dst_last_clause;
7388   ValNodePtr src_last_feat, src_first_clause;
7389 
7390   if (dstfcp == NULL || srcfcp == NULL || srcfcp->featlist == NULL) return;
7391 
7392   dst_first_clause = NULL;
7393   dst_last_clause = NULL;
7394   src_first_clause = NULL;
7395 
7396   dst_last_feat = dstfcp->featlist;
7397   while (dst_last_feat != NULL
7398       && dst_last_feat->next != NULL
7399       && dst_last_feat->next->choice == DEFLINE_FEATLIST)
7400   {
7401     dst_last_feat = dst_last_feat->next;
7402   }
7403   if (dst_last_feat != NULL)
7404   {
7405     dst_first_clause = dst_last_feat->next;
7406   }
7407   dst_last_clause = dst_first_clause;
7408   while (dst_last_clause != NULL && dst_last_clause->next != NULL)
7409   {
7410     dst_last_clause = dst_last_clause->next;
7411   }
7412 
7413   src_last_feat = srcfcp->featlist;
7414   while (src_last_feat != NULL
7415       && src_last_feat->next != NULL
7416       && src_last_feat->next->choice == DEFLINE_FEATLIST)
7417   {
7418     src_last_feat = src_last_feat->next;
7419   }
7420   if (src_last_feat != NULL)
7421   {
7422     src_first_clause = src_last_feat->next;
7423   }
7424 
7425   /* insert features before clauses */
7426   if (dst_last_feat == NULL)
7427   {
7428     dstfcp->featlist = srcfcp->featlist;
7429     dst_last_feat = src_last_feat;
7430   }
7431   else
7432   {
7433     dst_last_feat->next = srcfcp->featlist;
7434   }
7435   /* insert clauses after feats */
7436   if (dst_first_clause != NULL)
7437   {
7438     src_last_feat->next = dst_first_clause;
7439     dst_last_clause->next = src_first_clause;
7440   }
7441   srcfcp->featlist = NULL;
7442 }
7443 
7444 /* we want to look through the list for CDS features */
7445 /* if we find two CDSs that are alternatively spliced, */
7446 /* we replace the first alternatively spliced CDS feature */
7447 /* with a new CDS feature that has the new protein name as */
7448 /* a comment and a data.choice value that indicates alt splicing */
7449 /* we remove the second alternatively spliced CDS feature from the list */
7450 
FindAltSplices(ValNodePtr clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7451 static void FindAltSplices
7452 ( ValNodePtr clause_list,
7453   BioseqPtr  bsp,
7454   DeflineFeatureRequestListPtr rp)
7455 {
7456   FeatureClausePtr  fcp1, fcp2;
7457   ValNodePtr cdsclause1, cdsclause2;
7458   ValNodePtr searchclause;
7459   CharPtr  combined_protein_name;
7460   Boolean    partial3_1, partial5_1, partial3_2, partial5_2;
7461   Int4       left1, left2, right1, right2;
7462 
7463   if (clause_list == NULL) return;
7464 
7465   cdsclause1 = FindNextCDSClause (clause_list);
7466   while (cdsclause1 != NULL)
7467   {
7468     fcp1 = (FeatureClausePtr) cdsclause1->data.ptrvalue;
7469     if (fcp1->feature_label_data.productname == NULL)
7470     {
7471       fcp1->feature_label_data.productname =
7472            GetProductName (fcp1->featlist->data.ptrvalue, bsp, rp);
7473     }
7474     searchclause = cdsclause1->next;
7475     cdsclause2 = FindNextCDSClause (searchclause);
7476     while (cdsclause2 != NULL)
7477     {
7478       fcp2 = (FeatureClausePtr) cdsclause2->data.ptrvalue;
7479       if (fcp2->feature_label_data.productname == NULL)
7480       {
7481         fcp2->feature_label_data.productname =
7482            GetProductName (fcp2->featlist->data.ptrvalue, bsp, rp);
7483       }
7484       combined_protein_name = MeetAltSpliceRules (fcp1, fcp2, bsp, rp);
7485       if (combined_protein_name != NULL)
7486       {
7487         /* get rid of variant, splice variant, splice product, isoform, etc.*/
7488         TrimUnwantedWordsFromAltSpliceProductName (combined_protein_name);
7489 
7490         /* get rid of trailing spaces in protein name */
7491         TrimSpacesAroundString (combined_protein_name);
7492 
7493         /* copy new protein name into first clause */
7494         MemFree (fcp1->feature_label_data.productname);
7495         fcp1->feature_label_data.productname = combined_protein_name;
7496         CheckSeqLocForPartial (fcp1->slp, &partial5_1, &partial3_1);
7497         left1 = GetOffsetInBioseq (fcp1->slp, bsp, SEQLOC_LEFT_END);
7498         right1 = GetOffsetInBioseq (fcp1->slp, bsp, SEQLOC_RIGHT_END);
7499         CheckSeqLocForPartial (fcp2->slp, &partial5_2, &partial3_2);
7500         left2 = GetOffsetInBioseq (fcp2->slp, bsp, SEQLOC_LEFT_END);
7501         right2 = GetOffsetInBioseq (fcp2->slp, bsp, SEQLOC_RIGHT_END);
7502         fcp1->slp = SeqLocMerge (bsp, fcp1->slp, fcp2->slp,
7503                                  FALSE, TRUE, FALSE);
7504         if (left1 == left2)
7505         {
7506           partial5_1 |= partial5_2;
7507         }
7508         else
7509         {
7510           partial5_1 = left1 < left2 ? partial5_1 : partial5_2;
7511         }
7512         if (right1 == right2)
7513         {
7514           partial3_1 |= partial3_2;
7515         }
7516         else
7517         {
7518           partial3_1 = right1 > right2 ? partial3_1 : partial3_2;
7519         }
7520         SetSeqLocPartial (fcp1->slp, partial5_1, partial3_1);
7521         fcp1->is_alt_spliced = TRUE;
7522 
7523         /* copy over fcp2 subclauses */
7524         MoveSubclauses (fcp1, fcp2);
7525 
7526         /* remove second clause */
7527         fcp2->delete_me = TRUE;
7528       }
7529       searchclause = cdsclause2->next;
7530       cdsclause2 = FindNextCDSClause (searchclause);
7531     }
7532     cdsclause1 = FindNextCDSClause (cdsclause1->next);
7533   }
7534   DeleteFeatureClauses (&clause_list);
7535 }
7536 
LabelClauses(ValNodePtr clause_list,Uint1 biomol,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7537 static void LabelClauses
7538 ( ValNodePtr clause_list,
7539   Uint1      biomol,
7540   BioseqPtr  bsp,
7541   DeflineFeatureRequestListPtr rp)
7542 {
7543   ValNodePtr clause;
7544 
7545   clause = clause_list;
7546   while (clause != NULL)
7547   {
7548     LabelFeature ( bsp, biomol, clause->data.ptrvalue, rp);
7549     clause = clause->next;
7550   }
7551 }
7552 
7553 static CharPtr misc_words [] = {
7554   "internal transcribed spacer",
7555   "external transcribed spacer",
7556   "ribosomal RNA intergenic spacer",
7557   "ribosomal RNA",
7558   "intergenic spacer region",
7559   "intergenic spacer"
7560 };
7561 
7562 typedef enum {
7563   MISC_RNA_WORD_INTERNAL_SPACER = 0,
7564   MISC_RNA_WORD_EXTERNAL_SPACER,
7565   MISC_RNA_WORD_RNA_INTERGENIC_SPACER,
7566   MISC_RNA_WORD_RNA,
7567   MISC_RNA_WORD_INTERGENIC_SPACER_REGION,
7568   MISC_RNA_WORD_INTERGENIC_SPACER,
7569   NUM_MISC_RNA_WORDS
7570 } MiscWord;
7571 
7572 /* note - must put substrings of other separators after the longer version */
7573 static CharPtr separators [] = {
7574   ", and ",
7575   " and ",
7576   ", ",
7577   "; "
7578 };
7579 
7580 #define num_separators 3
7581 
7582 
TokenListFromMiscRNAString(CharPtr str)7583 static ValNodePtr TokenListFromMiscRNAString (CharPtr str)
7584 {
7585   ValNodePtr token_list = NULL;
7586   CharPtr cansep [num_separators];
7587   CharPtr token_start, next_sep, token;
7588   Int4    i, sep_len, datalen;
7589   Uint1   word_i;
7590   Boolean found_unparseable = FALSE;
7591 
7592   if ( StringStr (str, "spacer") == NULL) {
7593     return NULL;
7594   }
7595 
7596   token_start = str;
7597   for (i = 0; i < num_separators; i++) {
7598     cansep[i] = StringStr (token_start, separators[i]);
7599   }
7600 
7601   while (*token_start != 0 && !found_unparseable) {
7602     next_sep = NULL;
7603     sep_len = 0;
7604     for (i = 0; i < num_separators; i++) {
7605       if (cansep[i] != NULL) {
7606         if (cansep[i] < token_start) {
7607           cansep[i] = StringStr (token_start, separators[i]);
7608         }
7609       }
7610       if (cansep[i] != NULL && (next_sep == NULL || next_sep > cansep[i])) {
7611         next_sep = cansep[i];
7612         sep_len = StringLen (separators[i]);
7613       }
7614     }
7615     if (next_sep == NULL) {
7616       token = StringSave (token_start);
7617       datalen = StringLen (token);
7618     } else {
7619       datalen = next_sep - token_start;
7620       token = (CharPtr) MemNew (sizeof (Char) * (datalen + 1));
7621       StringNCpy (token, token_start, datalen);
7622       token[datalen] = 0;
7623     }
7624     /* determine which word is part of the token */
7625     for (word_i=0;
7626          word_i < NUM_MISC_RNA_WORDS
7627            && StringStr (token, misc_words [word_i]) == NULL;
7628          word_i++) {}
7629     if (word_i < NUM_MISC_RNA_WORDS) {
7630       ValNodeAddPointer (&token_list, word_i, token);
7631     } else {
7632       found_unparseable = TRUE;
7633     }
7634     token_start += datalen + sep_len;
7635   }
7636   if (found_unparseable) {
7637     token_list = ValNodeFreeData (token_list);
7638   }
7639   return token_list;
7640 }
7641 
7642 
7643 static ValNodePtr
GetFeatureClausesFromMiscRNATokens(ValNodePtr token_list,SeqFeatPtr misc_rna,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7644 GetFeatureClausesFromMiscRNATokens
7645 ( ValNodePtr token_list,
7646   SeqFeatPtr misc_rna,
7647   BioseqPtr  bsp,
7648   DeflineFeatureRequestListPtr rp)
7649 {
7650   ValNodePtr clause_list = NULL;
7651   ValNodePtr vnp;
7652   Boolean    partial5, partial3, unparseable = FALSE;
7653   CharPtr    word_loc;
7654   FeatureClausePtr fcp;
7655 
7656   if (token_list == NULL || misc_rna == NULL) {
7657     return NULL;
7658   }
7659 
7660   CheckSeqLocForPartial (misc_rna->location, &partial5, &partial3);
7661 
7662   for (vnp = token_list; vnp != NULL && !unparseable; vnp = vnp->next) {
7663     word_loc = StringStr (vnp->data.ptrvalue, misc_words [vnp->choice]);
7664     if (word_loc == NULL) {
7665       unparseable = TRUE;
7666     } else {
7667       fcp = NewFeatureClause ( misc_rna, bsp, rp);
7668       if (fcp == NULL) {
7669         unparseable = TRUE;
7670       } else {
7671         if (vnp->choice == MISC_RNA_WORD_INTERNAL_SPACER
7672             || vnp->choice == MISC_RNA_WORD_EXTERNAL_SPACER
7673             || vnp->choice == MISC_RNA_WORD_RNA_INTERGENIC_SPACER
7674             || vnp->choice == MISC_RNA_WORD_INTERGENIC_SPACER
7675             || vnp->choice == MISC_RNA_WORD_INTERGENIC_SPACER_REGION) {
7676           if (word_loc == vnp->data.ptrvalue) {
7677             fcp->feature_label_data.is_typeword_first = TRUE;
7678             fcp->feature_label_data.typeword = StringSave (misc_words [vnp->choice]);
7679             if (StringLen (misc_words [vnp->choice]) + 1 < StringLen (vnp->data.ptrvalue)) {
7680               fcp->feature_label_data.description =
7681                     StringSave ( ((CharPtr)vnp->data.ptrvalue) + StringLen (misc_words [vnp->choice]) + 1);
7682             }
7683           } else {
7684             fcp->feature_label_data.is_typeword_first = FALSE;
7685             fcp->feature_label_data.typeword = StringSave (misc_words [vnp->choice]);
7686             if (StringLen (misc_words [vnp->choice]) + 1 < StringLen (vnp->data.ptrvalue)) {
7687               fcp->feature_label_data.description = StringSave ( vnp->data.ptrvalue);
7688               fcp->feature_label_data.description [word_loc - ((CharPtr) vnp->data.ptrvalue) - 1] = 0;
7689             }
7690           }
7691         } else if (vnp->choice == MISC_RNA_WORD_RNA) {
7692           fcp->feature_label_data.description = StringSave (vnp->data.ptrvalue);
7693           fcp->feature_label_data.is_typeword_first = FALSE;
7694           fcp->feature_label_data.typeword = StringSave ("gene");
7695         }
7696         if ((vnp == token_list && partial5) || (vnp->next == NULL && partial3)) {
7697           fcp->interval = StringSave ("partial sequence");
7698         } else {
7699           fcp->interval = StringSave ("complete sequence");
7700         }
7701         ValNodeAddPointer (&clause_list, DEFLINE_CLAUSEPLUS, fcp);
7702       }
7703     }
7704   }
7705   if (unparseable) {
7706     DefLineFeatClauseListFree (clause_list);
7707     clause_list = NULL;
7708   }
7709   return clause_list;
7710 }
7711 
7712 
s_ItemNeedsGene(CharPtr item_str)7713 static Boolean s_ItemNeedsGene (CharPtr item_str)
7714 {
7715   Int4 item_len;
7716 
7717   item_len = StringLen (item_str);
7718   if (StringSearch (item_str, "RNA") != NULL
7719       && !((item_len > 4 && StringCmp (item_str + item_len - 4, "gene") == 0)
7720       || (item_len > 5 && StringCmp (item_str + item_len - 5, "genes") == 0))) {
7721     return TRUE;
7722   } else {
7723     return FALSE;
7724   }
7725 }
7726 
GetRegionDescription(ValNodePtr token_list,SeqFeatPtr misc_rna,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7727 static CharPtr GetRegionDescription
7728 (ValNodePtr token_list,
7729  SeqFeatPtr misc_rna,
7730  BioseqPtr bsp,
7731  DeflineFeatureRequestListPtr rp)
7732 {
7733   ValNodePtr vnp;
7734   CharPtr desc, item_str;
7735   Int4    len = 5, item_len;
7736 
7737   if (token_list == NULL) {
7738     return NULL;
7739   }
7740 
7741   for (vnp = token_list; vnp != NULL; vnp = vnp->next) {
7742     item_str = (CharPtr) vnp->data.ptrvalue;
7743     item_len = StringLen (item_str);
7744     len += item_len + 2;
7745     if (s_ItemNeedsGene(item_str)) {
7746       len += 5;
7747     }
7748   }
7749 
7750   desc = (CharPtr) MemNew (sizeof (Char) * (len));
7751   desc[0] = 0;
7752   for (vnp = token_list; vnp != NULL; vnp = vnp->next) {
7753     StringCat (desc, vnp->data.ptrvalue);
7754     if (s_ItemNeedsGene(vnp->data.ptrvalue)) {
7755       StringCat (desc, " gene");
7756     }
7757     if (vnp->next != NULL) {
7758       StringCat (desc, ", ");
7759       if (vnp->next->next == NULL) {
7760         StringCat (desc, "and ");
7761       }
7762     }
7763   }
7764 
7765   return desc;
7766 }
7767 
7768 
7769 /* Some misc_RNA clauses have a comment that actually lists multiple
7770  * features.  This function creates a clause for each element in the
7771  * comment and inserts the list of new clauses into the feature list
7772  * at the point where the single previous clause was.
7773  */
GetMiscRNAelements(SeqFeatPtr misc_rna,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7774 static ValNodePtr GetMiscRNAelements
7775 ( SeqFeatPtr misc_rna,
7776   BioseqPtr  bsp,
7777   DeflineFeatureRequestListPtr rp)
7778 {
7779   CharPtr buffer;
7780   ValNodePtr token_list, clause_list = NULL;
7781   FeatureClausePtr fcp;
7782 
7783   if (misc_rna == NULL) return NULL;
7784   buffer = GetProductName (misc_rna, bsp, rp);
7785   if (buffer == NULL)
7786   {
7787     buffer = StringSave (misc_rna->comment);
7788   }
7789   else if (StringNCmp (buffer, misc_rna->comment, StringLen (buffer) -1) == 0
7790     && buffer [ StringLen (buffer) - 1] == '>')
7791   {
7792     MemFree (buffer);
7793     buffer = StringSave (misc_rna->comment);
7794   }
7795 
7796   if (StringNCmp (buffer, "contains ", 9) == 0) {
7797     token_list = TokenListFromMiscRNAString (buffer + 9);
7798     clause_list = GetFeatureClausesFromMiscRNATokens (token_list, misc_rna, bsp, rp);
7799     token_list = ValNodeFreeData (token_list);
7800   } else if (StringNCmp (buffer, "may contain ", 12) == 0) {
7801     token_list = TokenListFromMiscRNAString (buffer + 12);
7802     if (token_list != NULL) {
7803       fcp = NewFeatureClause ( misc_rna, bsp, rp);
7804       fcp->feature_label_data.description = GetRegionDescription (token_list, misc_rna, bsp, rp);
7805       fcp->feature_label_data.typeword = StringSave ("");
7806       fcp->interval = StringSave ("region");
7807       ValNodeAddPointer (&clause_list, DEFLINE_CLAUSEPLUS, fcp);
7808     }
7809     token_list = ValNodeFreeData (token_list);
7810   } else {
7811     token_list = TokenListFromMiscRNAString (buffer);
7812     clause_list = GetFeatureClausesFromMiscRNATokens (token_list, misc_rna, bsp, rp);
7813     token_list = ValNodeFreeData (token_list);
7814   }
7815 
7816   buffer = MemFree (buffer);
7817   return clause_list;
7818 }
7819 
7820 
7821 /* Some misc_feature clauses have a comment that actually lists a tRNA
7822  * and either a control region or D-loop.  This function creates a clause
7823  * for each element in the comment and inserts the list of new clauses into
7824  * the feature list at the point where the single previous clause was.
7825  */
GettRNAAndOtherElements(SeqFeatPtr misc_feat,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7826 static ValNodePtr GettRNAAndOtherElements
7827 ( SeqFeatPtr misc_feat,
7828   BioseqPtr  bsp,
7829   DeflineFeatureRequestListPtr rp)
7830 {
7831   ValNodePtr clause_list = NULL, cf_list = NULL, vnp;
7832   FeatureClausePtr fcp;
7833   Boolean partial5, partial3, is_partial;
7834 
7835   if (misc_feat == NULL ||
7836       misc_feat->idx.subtype != FEATDEF_misc_feature ||
7837       StringHasNoText (misc_feat->comment)) {
7838     return NULL;
7839   }
7840 
7841   cf_list = ParsetRNAAndOtherElement(misc_feat->comment);
7842   if (cf_list == NULL) {
7843     return NULL;
7844   }
7845 
7846   CheckSeqLocForPartial (misc_feat->location, &partial5, &partial3);
7847 
7848   for (vnp = cf_list; vnp != NULL; vnp = vnp->next) {
7849     is_partial = FALSE;
7850     if (vnp == cf_list && partial5) {
7851       is_partial = TRUE;
7852     } else if (vnp->next == NULL && partial3) {
7853       is_partial = TRUE;
7854     }
7855     fcp = FeatureClauseFromParsedComment (vnp->data.ptrvalue, misc_feat, is_partial, bsp, rp);
7856     ValNodeAddPointer (&clause_list, DEFLINE_CLAUSEPLUS, fcp);
7857   }
7858   cf_list = CommentFeatListFree(cf_list);
7859   return clause_list;
7860 }
7861 
7862 
7863 /* Some misc_feat clauses have a comment that lists one or more tRNAs and
7864  * an intergenic spacer.  This function creates a clause for each element
7865  * in the comment and inserts the list of new clauses into the feature list
7866  * at the point where the single previous clause was.
7867  */
ReplaceIntergenicSpacerClauses(ValNodePtr PNTR clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)7868 static void ReplaceIntergenicSpacerClauses (
7869   ValNodePtr PNTR clause_list,
7870   BioseqPtr       bsp,
7871   DeflineFeatureRequestListPtr rp)
7872 {
7873   FeatureClausePtr fcp;
7874   SeqFeatPtr main_feat;
7875   ValNodePtr clause, replacement_clauses, nextclause, vnp;
7876 
7877   if (clause_list == NULL || *clause_list == NULL) return;
7878   clause = *clause_list;
7879   while (clause != NULL)
7880   {
7881     nextclause = clause->next;
7882     fcp = (clause->data.ptrvalue);
7883     if (fcp == NULL
7884       || fcp->featlist == NULL
7885       || fcp->featlist->choice != DEFLINE_FEATLIST)
7886     {
7887       return;
7888     }
7889     main_feat = (SeqFeatPtr) fcp->featlist->data.ptrvalue;
7890 
7891     if (IsParsableList (main_feat))
7892     {
7893       if ((replacement_clauses = ParsetRNAIntergenicSpacerElements ( main_feat, bsp, rp)) != NULL)
7894       {
7895         for (vnp = replacement_clauses; vnp->next != NULL; vnp = vnp->next) {}
7896         vnp->next = clause->next;
7897         clause->next = replacement_clauses;
7898         fcp->delete_me = TRUE;
7899       }
7900       else
7901       {
7902         fcp->delete_me = TRUE;
7903       }
7904     } else if ((replacement_clauses = GetMiscRNAelements ( main_feat, bsp, rp )) != NULL ||
7905                (replacement_clauses = GettRNAAndOtherElements ( main_feat, bsp, rp )) != NULL) {
7906       for (vnp = replacement_clauses; vnp->next != NULL; vnp = vnp->next) {}
7907       vnp->next = clause->next;
7908       clause->next = replacement_clauses;
7909       fcp->delete_me = TRUE;
7910     }
7911     clause = nextclause;
7912   }
7913   DeleteFeatureClauses (clause_list);
7914 }
7915 
7916 /* If we are applying a different rule for misc_feats, we need to recalculate
7917  * their descriptions.
7918  */
RenameMiscFeats(ValNodePtr clause_list,Uint1 biomol)7919 static void RenameMiscFeats (ValNodePtr clause_list, Uint1 biomol)
7920 {
7921   ValNodePtr       vnp, featlist;
7922   FeatureClausePtr fcp, featlistclause;
7923   SeqFeatPtr       sfp;
7924   Int4             name_len;
7925 
7926   for (vnp = clause_list; vnp != NULL; vnp = vnp->next)
7927   {
7928     if (vnp->choice != DEFLINE_CLAUSEPLUS || vnp->data.ptrvalue == NULL)
7929     {
7930       continue;
7931     }
7932     fcp = vnp->data.ptrvalue;
7933     for (featlist = fcp->featlist; featlist != NULL; featlist = featlist->next)
7934     {
7935       if ( featlist->data.ptrvalue == NULL)
7936       {
7937         continue;
7938       }
7939       if (featlist->choice == DEFLINE_CLAUSEPLUS)
7940       {
7941         featlistclause = featlist->data.ptrvalue;
7942         RenameMiscFeats (featlistclause->featlist, biomol);
7943         continue;
7944       }
7945       if (featlist->choice != DEFLINE_FEATLIST)
7946       {
7947         continue;
7948       }
7949       sfp = featlist->data.ptrvalue;
7950       if (sfp->idx.subtype != FEATDEF_misc_feature
7951         || sfp->comment == NULL
7952         || IsIntergenicSpacer (sfp)
7953         || IsGeneCluster (sfp)
7954         || IsControlRegion (sfp))
7955       {
7956         continue;
7957       }
7958       if (fcp->feature_label_data.description != NULL)
7959       {
7960         fcp->feature_label_data.description
7961                    = MemFree (fcp->feature_label_data.description);
7962       }
7963       name_len = StringCSpn (sfp->comment, ";");
7964       /* make sure we have space for terminating NULL */
7965       fcp->feature_label_data.description = MemNew ((name_len + 1) * sizeof (Char));
7966       if (fcp->feature_label_data.description == NULL) return;
7967       StringNCpy (fcp->feature_label_data.description, sfp->comment, name_len);
7968       fcp->feature_label_data.description [ name_len ] = 0;
7969       fcp->feature_label_data.typeword =
7970             MemFree (fcp->feature_label_data.typeword);
7971       if (StringCmp (fcp->feature_label_data.description + name_len - 9, " sequence") == 0)
7972       {
7973         fcp->feature_label_data.description[name_len - 9] = 0;
7974         fcp->feature_label_data.typeword = StringSave ("sequence");
7975       }
7976       else if (biomol == MOLECULE_TYPE_GENOMIC)
7977       {
7978         fcp->feature_label_data.typeword = StringSave ("genomic sequence");
7979       }
7980       else if (biomol == MOLECULE_TYPE_MRNA)
7981       {
7982         fcp->feature_label_data.typeword = StringSave ("mRNA sequence");
7983       }
7984       else
7985       {
7986         fcp->feature_label_data.typeword = StringSave ("sequence");
7987       }
7988 
7989       fcp->interval = MemFree (fcp->interval);
7990       fcp->interval = StringSave ("");
7991     }
7992   }
7993 }
7994 
RemoveUnwantedMiscFeats(ValNodePtr PNTR clause_list,Boolean delete_now)7995 static void RemoveUnwantedMiscFeats (
7996   ValNodePtr PNTR clause_list,
7997   Boolean delete_now
7998 )
7999 {
8000   ValNodePtr       vnp, featlist;
8001   FeatureClausePtr fcp, featlistclause;
8002   SeqFeatPtr       sfp;
8003 
8004   for (vnp = *clause_list; vnp != NULL; vnp = vnp->next)
8005   {
8006     if (vnp->choice != DEFLINE_CLAUSEPLUS || vnp->data.ptrvalue == NULL)
8007     {
8008       continue;
8009     }
8010     fcp = vnp->data.ptrvalue;
8011     for (featlist = fcp->featlist; featlist != NULL; featlist = featlist->next)
8012     {
8013       if ( featlist->data.ptrvalue == NULL)
8014       {
8015         continue;
8016       }
8017       if (featlist->choice == DEFLINE_CLAUSEPLUS)
8018       {
8019         featlistclause = featlist->data.ptrvalue;
8020         RemoveUnwantedMiscFeats (&(featlistclause->featlist), FALSE);
8021         continue;
8022       }
8023       if (featlist->choice != DEFLINE_FEATLIST)
8024       {
8025         continue;
8026       }
8027       sfp = featlist->data.ptrvalue;
8028       if ( sfp->idx.subtype == FEATDEF_misc_feature
8029         && ! IsNoncodingProductFeat (sfp)
8030         && ! IsControlRegion (sfp)
8031         && ! IsIntergenicSpacer (sfp)
8032         && ! IsGeneCluster (sfp)
8033         && ! IsParsableList (sfp)
8034         && ! IsTrnaPlusOther (sfp))
8035       {
8036         fcp->delete_me = TRUE;
8037       }
8038     }
8039   }
8040   DeleteFeatureClauses (clause_list);
8041 }
8042 
8043 /* When a feature is on the minus strand, the clauses are listed by
8044  * sequence indexing in reverse biological order - we reverse the subclauses
8045  * for the feature in order to have them listed in the definition line
8046  * in biological order.
8047  * This is most noticeable when the main feature is a CDS with multiple
8048  * exons numbered sequentially.  If the exons are on the minus strand and
8049  * appear as 9, 8, 7, 6, we want to display them in the definition line as
8050  * 6, 7, 8, 9.
8051  */
ReverseClauses(ValNodePtr PNTR clause_list,matchFunction itemmatch)8052 static void ReverseClauses (
8053   ValNodePtr PNTR clause_list,
8054   matchFunction   itemmatch
8055 )
8056 {
8057   ValNodePtr vnp, last_feat, first_feat, next_item, new_list;
8058   FeatureClausePtr fcp;
8059 
8060   if (clause_list == NULL || *clause_list == NULL) return;
8061 
8062   last_feat = NULL;
8063   first_feat = NULL;
8064   new_list = NULL;
8065   vnp = *clause_list;
8066   while (vnp != NULL)
8067   {
8068     next_item = vnp->next;
8069     fcp = NULL;
8070     if (vnp->choice == DEFLINE_CLAUSEPLUS
8071       && (fcp = vnp->data.ptrvalue) != NULL
8072       && fcp->slp != NULL
8073       && SeqLocStrand (fcp->slp) == Seq_strand_minus
8074       && fcp->featlist != NULL
8075       && fcp->featlist->choice == DEFLINE_FEATLIST
8076       && itemmatch (fcp->featlist->data.ptrvalue))
8077     {
8078       vnp->next = new_list;
8079       new_list = vnp;
8080     }
8081     else
8082     {
8083       if (first_feat == NULL)
8084       {
8085         first_feat = vnp;
8086         last_feat = vnp;
8087       }
8088       else
8089       {
8090         last_feat->next = vnp;
8091         last_feat = vnp;
8092         last_feat->next = NULL;
8093       }
8094     }
8095     if (fcp != NULL)
8096     {
8097       ReverseClauses (&(fcp->featlist), itemmatch);
8098     }
8099     vnp = next_item;
8100   }
8101   if (first_feat == NULL)
8102   {
8103     *clause_list = new_list;
8104   }
8105   else
8106   {
8107     last_feat->next = new_list;
8108     *clause_list = first_feat;
8109   }
8110 }
8111 
8112 /* This function is used to determine whether two features are both exons
8113  * and whether they are numerically sequential - i.e., exon 7 and exon 8
8114  * are a pair of consecutive exons, exon 7 and exon 9 are not, and exon 7
8115  * and intron 9 are not.
8116  */
ClausePairIsTwoConsecutiveExons(ValNodePtr vnp1,ValNodePtr vnp2,BioseqPtr bsp)8117 static Boolean ClausePairIsTwoConsecutiveExons (
8118   ValNodePtr vnp1,
8119   ValNodePtr vnp2,
8120   BioseqPtr  bsp
8121 )
8122 {
8123   FeatureClausePtr fcp1, fcp2;
8124   SeqFeatPtr       exon1, exon2;
8125   Int4 num1, num2;
8126   CharPtr          exdesc1, exdesc2;
8127 
8128   if (vnp1 == NULL || vnp2 == NULL
8129     || vnp1->choice != DEFLINE_CLAUSEPLUS
8130     || vnp2->choice != DEFLINE_CLAUSEPLUS
8131     || vnp1->data.ptrvalue == NULL
8132     || vnp2->data.ptrvalue == NULL)
8133   {
8134     return FALSE;
8135   }
8136   fcp1 = vnp1->data.ptrvalue;
8137   fcp2 = vnp2->data.ptrvalue;
8138   if ( fcp1->featlist == NULL
8139     || fcp1->featlist->data.ptrvalue == NULL
8140     || fcp2->featlist == NULL
8141     || fcp2->featlist->data.ptrvalue == NULL
8142     || fcp1->featlist->choice != DEFLINE_FEATLIST
8143     || fcp2->featlist->choice != DEFLINE_FEATLIST
8144     || ! IsExon (fcp1->featlist->data.ptrvalue)
8145     || ! IsExon (fcp2->featlist->data.ptrvalue)
8146     || (fcp1->is_alt_spliced && ! fcp2->is_alt_spliced)
8147     || (! fcp1->is_alt_spliced && fcp2->is_alt_spliced))
8148   {
8149     return FALSE;
8150   }
8151 
8152   exon1 = (SeqFeatPtr)(fcp1->featlist->data.ptrvalue);
8153   exon2 = (SeqFeatPtr)(fcp2->featlist->data.ptrvalue);
8154 
8155   exdesc1 = GetExonDescription (bsp, exon1);
8156   exdesc2 = GetExonDescription (bsp, exon2);
8157   if (exdesc1 == NULL || exdesc2 == NULL)
8158   {
8159     if (exdesc1 != NULL) MemFree (exdesc1);
8160     if (exdesc2 != NULL) MemFree (exdesc2);
8161     return FALSE;
8162   }
8163 
8164   num1 = atoi (exdesc1);
8165   num2 = atoi (exdesc2);
8166   MemFree (exdesc1);
8167   MemFree (exdesc2);
8168 
8169   if (abs (num1 - num2) == 1)
8170   {
8171     return TRUE;
8172   }
8173 
8174   return FALSE;
8175 }
8176 
8177 /* This function counts the number of consecutive exons in a list.
8178  */
GetNumberOfConsecutiveExons(ValNodePtr list,BioseqPtr bsp)8179 static Int4 GetNumberOfConsecutiveExons (
8180   ValNodePtr list,
8181   BioseqPtr  bsp
8182 )
8183 {
8184   ValNodePtr check;
8185   Int4       num_exons;
8186 
8187   num_exons = 0;
8188   check = list->next;
8189   if ( ! ClausePairIsTwoConsecutiveExons (list, check, bsp)) return 0;
8190 
8191   num_exons = 2;
8192   while ( check != NULL
8193     && ClausePairIsTwoConsecutiveExons (check, check->next, bsp))
8194   {
8195     num_exons++;
8196     check = check->next;
8197   }
8198   return num_exons;
8199 }
8200 
8201 /* This function replaces a list of three or more consecutive exon clauses
8202  * with a single "summary" clause that gives the range of exons present -
8203  * i.e., if you have exons 1, 2, 3, and 4, a clause will be created that
8204  * contains all four of those features and has a description of "1 through 4".
8205  */
ReplaceExonClauseList(FeatureClausePtr fcp,ValNodePtr clause,Int4 num_exons,BioseqPtr bsp)8206 static void ReplaceExonClauseList (
8207   FeatureClausePtr fcp,
8208   ValNodePtr       clause,
8209   Int4             num_exons,
8210   BioseqPtr        bsp
8211 )
8212 {
8213   ValNodePtr       lastfeat, tmpclause;
8214   FeatureClausePtr tmpfcp;
8215   Int4             i;
8216   CharPtr          new_description;
8217   Int4             new_description_len;
8218   CharPtr          exdesc1 = NULL, exdesc2 = NULL;
8219 
8220   if (fcp == NULL || clause == NULL) return;
8221 
8222   lastfeat = fcp->featlist;
8223   while (lastfeat != NULL && lastfeat->next != NULL)
8224   {
8225     lastfeat = lastfeat->next;
8226   }
8227   tmpclause = clause->next;
8228   for (i=0; i < num_exons - 1 && tmpclause != NULL; i++)
8229   {
8230     tmpfcp = tmpclause->data.ptrvalue;
8231     tmpfcp->delete_me = TRUE;
8232     if (lastfeat == NULL)
8233     {
8234       fcp->featlist = tmpfcp->featlist;
8235     }
8236     else
8237     {
8238       lastfeat->next = tmpfcp->featlist;
8239     }
8240     tmpfcp->featlist = NULL;
8241     while (lastfeat != NULL && lastfeat->next != NULL)
8242     {
8243       lastfeat = lastfeat->next;
8244     }
8245 
8246     tmpclause = tmpclause->next;
8247   }
8248 
8249   if (fcp->featlist != NULL) {
8250     exdesc1 = GetExonDescription (bsp, fcp->featlist->data.ptrvalue);
8251   }
8252   if (lastfeat != NULL) {
8253     exdesc2 = GetExonDescription (bsp, lastfeat->data.ptrvalue);
8254   }
8255   if (exdesc1 == NULL || exdesc2 == NULL)
8256   {
8257     if (exdesc1 != NULL) MemFree (exdesc1);
8258     if (exdesc2 != NULL) MemFree (exdesc2);
8259     return;
8260   }
8261   new_description_len =
8262         StringLen (exdesc1)
8263       + StringLen (exdesc2)
8264       + StringLen (" through ")
8265       + 1;
8266   new_description = MemNew (new_description_len * sizeof (Char));
8267   if (new_description == NULL) return;
8268   sprintf (new_description, "%s through %s", exdesc1, exdesc2);
8269   MemFree (exdesc1);
8270   MemFree (exdesc2);
8271   if (fcp->feature_label_data.description != NULL)
8272   {
8273     MemFree (fcp->feature_label_data.description);
8274   }
8275   fcp->feature_label_data.description = new_description;
8276 }
8277 
8278 /* This function recursively searches for lists of consecutive exons
8279  * and calls ReplaceExonClauseList to consolidate the exons into a list
8280  * clause.
8281  */
RenameExonSequences(ValNodePtr PNTR list,BioseqPtr bsp,Boolean delete_now)8282 static void RenameExonSequences (
8283   ValNodePtr PNTR list,
8284   BioseqPtr       bsp,
8285   Boolean         delete_now
8286 )
8287 {
8288   ValNodePtr       clause;
8289   Int4             num_exons;
8290   FeatureClausePtr fcp;
8291 
8292   if (list == NULL) return;
8293   clause = *list;
8294   while (clause != NULL)
8295   {
8296     if (clause->choice == DEFLINE_CLAUSEPLUS
8297       && clause->data.ptrvalue != NULL)
8298     {
8299       fcp = clause->data.ptrvalue;
8300       if ( ! fcp->delete_me)
8301       {
8302         num_exons = GetNumberOfConsecutiveExons (clause, bsp);
8303         if (num_exons > 2)
8304         {
8305           ReplaceExonClauseList (fcp, clause, num_exons, bsp);
8306         }
8307         else
8308         {
8309           RenameExonSequences (&fcp->featlist, bsp, FALSE);
8310         }
8311       }
8312     }
8313     clause = clause->next;
8314   }
8315   if (delete_now) DeleteFeatureClauses (list);
8316 }
8317 
8318 static CharPtr organelleByGenome [] = {
8319   NULL,
8320   NULL,
8321   "chloroplast",
8322   "chromoplast",
8323   "kinetoplast",
8324   "mitochondrial",
8325   "plastid",
8326   "",
8327   "",
8328   "",
8329   "",
8330   "",
8331   "cyanelle",
8332   "",
8333   "",
8334   "",
8335   "apicoplast",
8336   "leucoplast",
8337   "proplastid",
8338   "",
8339   "hydrogenosome",
8340   "",
8341   "chromatophore",
8342   NULL,
8343 };
8344 
8345 static CharPtr organelleByPopup [] = {
8346   NULL,
8347   "mitochondrial",
8348   "chloroplast",
8349   "kinetoplast",
8350   "plastid",
8351   "chromoplast",
8352   "cyanelle",
8353   "apicoplast",
8354   "leucoplast",
8355   "proplastid",
8356   NULL
8357 };
8358 
8359 static void
AddProductEnding(CharPtr str,BioseqPtr bsp,Int2 mitochloroflag,ValNodePtr strings)8360 AddProductEnding
8361 (CharPtr    str,
8362  BioseqPtr  bsp,
8363  Int2       mitochloroflag,
8364  ValNodePtr strings)
8365 {
8366   Char orgnelle [80];
8367   BioSourcePtr  biop;
8368   ValNodePtr last_string;
8369   Int4 num_genes;
8370   SubSourcePtr  ssp;
8371 
8372   num_genes = 0;
8373   biop = GetBiopForBsp (bsp);
8374 
8375   if (biop != NULL) {
8376     if (FindStringInStrings (strings, "genes"))
8377     {
8378       num_genes = 2;
8379     }
8380     else if ((last_string = FindStringInStrings (strings, "gene")) != NULL
8381       && last_string->next != NULL
8382       && (last_string = FindStringInStrings (last_string->next, "gene")) != NULL)
8383     {
8384       num_genes = 2;
8385     }
8386     else
8387     {
8388       num_genes = 1;
8389     }
8390 
8391     orgnelle [0] = '\0';
8392 
8393     switch (biop->genome) {
8394     case GENOME_macronuclear :
8395       StringCat (str, "; macronuclear");
8396       break;
8397     case GENOME_nucleomorph :
8398       StringCat (str, "; nucleomorph");
8399       break;
8400     case GENOME_apicoplast :
8401     case GENOME_chloroplast :
8402     case GENOME_chromoplast :
8403     case GENOME_kinetoplast :
8404     case GENOME_mitochondrion :
8405     case GENOME_plastid :
8406     case GENOME_cyanelle :
8407     case GENOME_leucoplast :
8408     case GENOME_proplastid :
8409     case GENOME_hydrogenosome :
8410     case GENOME_chromatophore :
8411       sprintf (orgnelle, "; %s", organelleByGenome [biop->genome]);
8412       StringCat (str, orgnelle);
8413       break;
8414     default :
8415       ssp = biop->subtype;
8416       while (ssp != NULL && ssp->subtype != 255)
8417       {
8418         ssp = ssp->next;
8419       }
8420       if (ssp != NULL
8421         && ssp->name != NULL
8422         && StringStr (ssp->name, "micronuclear"))
8423       {
8424         StringCat (str, "; micronuclear");
8425       }
8426       else if (mitochloroflag > 0) {
8427         if (mitochloroflag > DEFAULT_ORGANELLE_CLAUSE && mitochloroflag - DEFAULT_ORGANELLE_CLAUSE < DEFAULT_ORGANELLE_CLAUSE) {
8428             sprintf(orgnelle, "; nuclear copy of %s gene", organelleByPopup[mitochloroflag - DEFAULT_ORGANELLE_CLAUSE]);
8429             StringCat(str, orgnelle);
8430         } else if (mitochloroflag > 9) {
8431           /* beyond list */
8432         }
8433         else {
8434           if (num_genes > 1)
8435           {
8436             sprintf (orgnelle, "; nuclear genes for %s products",
8437                      organelleByPopup [mitochloroflag]);
8438           }
8439           else
8440           {
8441             sprintf (orgnelle, "; nuclear gene for %s product",
8442                      organelleByPopup [mitochloroflag]);
8443           }
8444           StringCat (str, orgnelle);
8445         }
8446       }
8447       break;
8448     }
8449   }
8450 }
8451 
8452 /*---------------------------------------------------------------------*/
8453 /*                                                                     */
8454 /* AutoDef_AddEnding () -- Add an ending on to the definition line     */
8455 /*                         after the last feature.                     */
8456 /*                                                                     */
8457 /*---------------------------------------------------------------------*/
8458 
AutoDef_AddEnding(ValNodePtr clause_list,ValNodePtr PNTR strings,BioseqPtr bsp,Int2 mitochloroflag,Boolean alternate_splice_flag)8459 static void AutoDef_AddEnding (
8460   ValNodePtr   clause_list,
8461   ValNodePtr PNTR strings,
8462   BioseqPtr    bsp,
8463   Int2         mitochloroflag,
8464   Boolean      alternate_splice_flag
8465 )
8466 {
8467   Char str [200];
8468   ValNodePtr last_string;
8469   Int4 new_data_len;
8470   CharPtr new_data;
8471 
8472   str[0] = 0;
8473   AddProductEnding (str, bsp, mitochloroflag, *strings);
8474   if (alternate_splice_flag) {
8475     StringCat (str, ", alternatively spliced");
8476   }
8477 
8478   StringCat (str, ".");
8479 
8480   last_string = *strings;
8481   if (last_string == NULL)
8482   {
8483     ValNodeAddStr (strings, 0, StringSave ( str));
8484   }
8485   else
8486   {
8487     while (last_string->next != NULL) last_string = last_string->next;
8488     new_data_len = StringLen (last_string->data.ptrvalue) + StringLen (str) + 1;
8489     new_data = (CharPtr) MemNew (new_data_len);
8490     if (new_data == NULL) return;
8491     StringCpy (new_data, last_string->data.ptrvalue);
8492     StringCat (new_data, str);
8493     MemFree (last_string->data.ptrvalue);
8494     last_string->data.ptrvalue = new_data;
8495   }
8496 }
8497 
LastIntervalChangeBeforeEnd(FeatureClausePtr onebefore,FeatureClausePtr thisclause,ValNodePtr rest_of_list)8498 static Boolean LastIntervalChangeBeforeEnd (
8499   FeatureClausePtr onebefore,
8500   FeatureClausePtr thisclause,
8501   ValNodePtr rest_of_list
8502 )
8503 {
8504   ValNodePtr       vnp;
8505   FeatureClausePtr fcp;
8506 
8507   if (onebefore == NULL || rest_of_list == NULL) return FALSE;
8508 
8509   if (StringCmp (onebefore->interval, thisclause->interval) == 0) return FALSE;
8510 
8511   for (vnp = rest_of_list; vnp != NULL; vnp = vnp->next)
8512   {
8513     if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL)
8514     {
8515       fcp = vnp->data.ptrvalue;
8516       if (StringCmp (thisclause->interval, fcp->interval) != 0) return FALSE;
8517     }
8518   }
8519   return TRUE;
8520 
8521 }
8522 
PluralizeClauseIntervals(FeatureClausePtr fcp)8523 static void PluralizeClauseIntervals (
8524   FeatureClausePtr fcp
8525 )
8526 {
8527   CharPtr new_interval, cp;
8528 
8529   if (fcp->interval != NULL
8530     && (cp = StringStr (fcp->interval, "gene, ")) != NULL)
8531   {
8532     new_interval = MemNew (StringLen (fcp->interval) + 2);
8533     if (new_interval == NULL) return;
8534     StringCpy (new_interval, fcp->interval);
8535     new_interval [ cp - fcp->interval + 4] = 's';
8536     StringCpy (new_interval + (cp - fcp->interval) + 5,
8537                cp + 4);
8538     MemFree (fcp->interval);
8539     fcp->interval = new_interval;
8540   }
8541 }
8542 
DisplayAlleleName(FeatureClausePtr thisclause)8543 static Boolean DisplayAlleleName (FeatureClausePtr thisclause)
8544 {
8545   if (thisclause == NULL) return FALSE;
8546   if (StringCmp (thisclause->feature_label_data.typeword, "gene") == 0
8547     || StringCmp (thisclause->feature_label_data.typeword, "pseudogene") == 0
8548     || StringCmp (thisclause->feature_label_data.typeword, "mRNA") == 0
8549     || StringCmp (thisclause->feature_label_data.typeword, "pseudogene mRNA") == 0
8550     || StringCmp (thisclause->feature_label_data.typeword, "precursor RNA") == 0
8551     || StringCmp (thisclause->feature_label_data.typeword, "pseudogene precursor RNA") == 0)
8552   {
8553     return TRUE;
8554   }
8555   return FALSE;
8556 }
8557 
ListClauses(ValNodePtr clauselist,ValNodePtr PNTR strings,Boolean allow_semicolons,Boolean suppress_final_and,Boolean suppress_allele)8558 static void ListClauses (
8559   ValNodePtr clauselist,
8560   ValNodePtr PNTR strings,
8561   Boolean    allow_semicolons,
8562   Boolean    suppress_final_and,
8563   Boolean    suppress_allele
8564 )
8565 {
8566   FeatureClausePtr thisclause, onebefore, twobefore, oneafter, twoafter;
8567   Boolean print_typeword;
8568   Boolean print_and;
8569   Boolean print_comma;
8570   Boolean print_semicolon;
8571   Boolean print_comma_between_description_and_typeword;
8572   Boolean typeword_is_plural;
8573   size_t clause_len;
8574   CharPtr clause_string;
8575   Boolean oneafter_has_detail_change;
8576   Boolean oneafter_has_interval_change;
8577   Boolean oneafter_has_typeword_change;
8578   Boolean onebefore_has_detail_change;
8579   Boolean onebefore_has_interval_change;
8580   Boolean onebefore_has_typeword_change;
8581   SeqFeatPtr main_feat;
8582   CharPtr new_interval;
8583   ValNodePtr voneafter, vtwoafter;
8584 
8585   while (clauselist != NULL && clauselist->choice != DEFLINE_CLAUSEPLUS)
8586   {
8587     clauselist = clauselist->next;
8588   }
8589   if (clauselist == NULL) return;
8590 
8591   thisclause = clauselist->data.ptrvalue;
8592   onebefore = NULL;
8593   twobefore = NULL;
8594 
8595   while (thisclause != NULL)
8596   {
8597     oneafter_has_detail_change = FALSE;
8598     oneafter_has_interval_change = FALSE;
8599     oneafter_has_typeword_change = FALSE;
8600     onebefore_has_detail_change = FALSE;
8601     onebefore_has_interval_change = FALSE;
8602     onebefore_has_typeword_change = FALSE;
8603     if (onebefore != NULL)
8604     {
8605       if (StringCmp (onebefore->interval, thisclause->interval) != 0)
8606         onebefore_has_interval_change = TRUE;
8607       if (StringCmp (onebefore->feature_label_data.typeword,
8608                      thisclause->feature_label_data.typeword) != 0)
8609       {
8610         onebefore_has_typeword_change = TRUE;
8611       }
8612       if (onebefore_has_typeword_change || onebefore_has_interval_change
8613           || (!suppress_allele && DisplayAlleleName (onebefore) && StringLen (onebefore->allelename) != 0)
8614           || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0))
8615      {
8616         onebefore_has_detail_change = TRUE;
8617       }
8618     }
8619     voneafter = clauselist->next;
8620     while (voneafter != NULL && voneafter->choice != DEFLINE_CLAUSEPLUS)
8621     {
8622       voneafter = voneafter->next;
8623     }
8624     if (voneafter == NULL)
8625     {
8626       vtwoafter = NULL;
8627     }
8628     else
8629     {
8630       vtwoafter = voneafter->next;
8631       while (vtwoafter != NULL && vtwoafter->choice != DEFLINE_CLAUSEPLUS)
8632       {
8633         vtwoafter = vtwoafter->next;
8634       }
8635     }
8636 
8637     if (voneafter != NULL)
8638     {
8639       oneafter = voneafter->data.ptrvalue;
8640       if (StringCmp (oneafter->interval, thisclause->interval) != 0)
8641         oneafter_has_interval_change = TRUE;
8642       if (StringCmp (oneafter->feature_label_data.typeword,
8643                      thisclause->feature_label_data.typeword) != 0)
8644       {
8645         oneafter_has_typeword_change = TRUE;
8646       }
8647       if (oneafter_has_typeword_change  || oneafter_has_interval_change
8648           || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) != 0)
8649           || (!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) != 0))
8650       {
8651         oneafter_has_detail_change = TRUE;
8652       }
8653       if (vtwoafter != NULL)
8654       {
8655         twoafter = vtwoafter->data.ptrvalue;
8656       }
8657       else
8658       {
8659         twoafter = NULL;
8660       }
8661     }
8662     else
8663     {
8664       oneafter = NULL;
8665       twoafter = NULL;
8666     }
8667     print_typeword = FALSE;
8668     typeword_is_plural = FALSE;
8669     print_and = FALSE;
8670     print_comma = FALSE;
8671     print_semicolon = FALSE;
8672 
8673     if (thisclause->feature_label_data.is_typeword_first)
8674     {
8675       if (onebefore == NULL || onebefore_has_detail_change)
8676       {
8677         print_typeword = TRUE;
8678         if (oneafter != NULL && ! oneafter_has_detail_change)
8679         {
8680           typeword_is_plural = TRUE;
8681         }
8682         else if (StringStr (thisclause->feature_label_data.description, " through ") != NULL
8683           && StringCmp (thisclause->feature_label_data.typeword, "exon") == 0)
8684         {
8685           typeword_is_plural = TRUE;
8686         }
8687       }
8688     }
8689     else
8690     {
8691       if (oneafter == NULL || oneafter_has_detail_change)
8692       {
8693         print_typeword = TRUE;
8694         if (onebefore != NULL && ! onebefore_has_detail_change)
8695         {
8696           typeword_is_plural = TRUE;
8697         }
8698       }
8699     }
8700 
8701     /* when to print and before this section */
8702     if ( onebefore != NULL
8703          && ! onebefore_has_detail_change
8704          && (oneafter == NULL || oneafter_has_detail_change))
8705     {
8706       print_and = TRUE;
8707     }
8708     else if (oneafter == NULL && onebefore != NULL)
8709     {
8710       print_and = TRUE;
8711     }
8712     else if (onebefore != NULL
8713          && ! onebefore_has_interval_change
8714          && oneafter_has_interval_change)
8715     {
8716       print_and = TRUE;
8717     }
8718     else if ( LastIntervalChangeBeforeEnd ( onebefore,
8719                                             thisclause,
8720                                             clauselist->next))
8721     {
8722       print_and = TRUE;
8723     }
8724 
8725     if (suppress_final_and && oneafter == NULL)
8726     {
8727       print_and = FALSE;
8728     }
8729     if (suppress_final_and && oneafter != NULL && twoafter == NULL)
8730     {
8731       print_comma = TRUE;
8732     }
8733 
8734     /* when to print semicolon after this section */
8735     /* after every interval change except when exons change "interval" */
8736     /* exons changing interval are going from alt-spliced to not */
8737     /* or vice versa, in either case we don't want a semicolon or comma */
8738     if (oneafter != NULL && oneafter_has_interval_change
8739       && (StringCmp (thisclause->feature_label_data.typeword, "exon") != 0
8740          || StringCmp (oneafter->feature_label_data.typeword, "exon") != 0))
8741     {
8742       print_semicolon = TRUE;
8743     }
8744 
8745     /* when to print comma after this section */
8746     if (onebefore != NULL && oneafter != NULL
8747       && ! onebefore_has_detail_change
8748       && ! oneafter_has_detail_change )
8749     {
8750       print_comma = TRUE;
8751     }
8752     else if (oneafter != NULL && onebefore != NULL
8753       && ! onebefore_has_interval_change && ! oneafter_has_interval_change
8754       &&  onebefore_has_typeword_change &&  oneafter_has_typeword_change)
8755     {
8756       print_comma = TRUE;
8757     }
8758     else if (oneafter != NULL && twoafter != NULL
8759       && ! oneafter_has_detail_change
8760       && StringCmp (twoafter->feature_label_data.typeword,
8761                     thisclause->feature_label_data.typeword) == 0
8762       && StringCmp (twoafter->interval,
8763                     thisclause->interval) == 0)
8764     {
8765       print_comma = TRUE;
8766     }
8767     else if (oneafter != NULL  && twoafter != NULL
8768       && oneafter_has_typeword_change
8769       && StringCmp (twoafter->feature_label_data.typeword,
8770                     oneafter->feature_label_data.typeword) == 0
8771       && StringCmp (twoafter->interval,
8772                     oneafter->interval) == 0
8773       && ! print_and)
8774     {
8775       print_comma = TRUE;
8776     }
8777     else if (((oneafter_has_interval_change || oneafter == NULL)
8778       && StringDoesHaveText (thisclause->interval))
8779       || (oneafter_has_interval_change && oneafter != NULL && ! print_semicolon))
8780     {
8781       print_comma = TRUE;
8782     }
8783     else if (oneafter != NULL && twoafter != NULL
8784       && !oneafter_has_interval_change
8785       && StringCmp (thisclause->interval, twoafter->interval) == 0
8786       && oneafter_has_typeword_change
8787       && StringCmp (thisclause->feature_label_data.typeword,
8788                     twoafter->feature_label_data.typeword) != 0)
8789     {
8790       print_comma = TRUE;
8791     }
8792     else if (oneafter != NULL && onebefore != NULL && twoafter != NULL
8793       && ! oneafter_has_interval_change && ! onebefore_has_interval_change
8794       && StringCmp (thisclause->interval, twoafter->interval) == 0
8795       && oneafter_has_typeword_change)
8796     {
8797       print_comma = TRUE;
8798     }
8799     else if (oneafter != NULL && twoafter != NULL
8800       && oneafter_has_typeword_change
8801       && StringCmp (oneafter->feature_label_data.typeword,
8802                     twoafter->feature_label_data.typeword) != 0
8803       && ! oneafter_has_interval_change
8804       && StringCmp (oneafter->interval, twoafter->interval) == 0)
8805     {
8806       /* spacer 1, foo RNA gene, and spacer2, complete sequence */
8807       /*         ^ */
8808       print_comma = TRUE;
8809     }
8810     else if (oneafter != NULL && twoafter != NULL
8811       && ! oneafter_has_interval_change && StringCmp (thisclause->interval, twoafter->interval) == 0
8812       && ((!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0)
8813         || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0)))
8814     {
8815       print_comma = TRUE;
8816     }
8817     else if (oneafter != NULL && onebefore != NULL
8818       && ! oneafter_has_interval_change && ! onebefore_has_interval_change
8819       && ((!suppress_allele && DisplayAlleleName (oneafter) && StringLen (oneafter->allelename) > 0)
8820         || (!suppress_allele && DisplayAlleleName (thisclause) && StringLen (thisclause->allelename) > 0)))
8821     {
8822       print_comma = TRUE;
8823     }
8824 
8825     if (thisclause->featlist != NULL
8826       && thisclause->featlist->data.ptrvalue != NULL
8827       && StringDoesHaveText (thisclause->interval)
8828       && StringNCmp (thisclause->interval, "partial", 7) != 0
8829       && StringNCmp (thisclause->interval, "complete", 8) != 0)
8830     {
8831       main_feat = thisclause->featlist->data.ptrvalue;
8832       if (IsMobileElement (main_feat)
8833         || IsEndogenousVirusSourceFeature (main_feat) )
8834       {
8835         print_comma = FALSE;
8836       }
8837     }
8838 
8839     if (onebefore != NULL
8840       && ! onebefore_has_interval_change
8841       && (oneafter_has_interval_change || oneafter == NULL))
8842     {
8843       PluralizeClauseIntervals (thisclause);
8844     }
8845 
8846     if ( thisclause->make_plural )
8847     {
8848       if ((onebefore != NULL && ! onebefore_has_detail_change)
8849         || (oneafter != NULL && !oneafter_has_detail_change))
8850       {
8851         PluralizeConsolidatedClauseDescription (thisclause);
8852       }
8853       else
8854       {
8855         typeword_is_plural = TRUE;
8856       }
8857     }
8858 
8859     clause_len = StringLen (thisclause->feature_label_data.description) + 1;
8860 
8861     /* add one in case we need to add the semicolon to this clause (when
8862      * the interval has changed because this clause has no interval and
8863      * the next one does).
8864      */
8865     clause_len++;
8866 
8867     /* we need to place a comma between the description and the type word
8868      * when the description ends with "precursor" or when the type word
8869      * starts with "precursor"
8870      */
8871     if ( thisclause->feature_label_data.description != NULL
8872       && ! thisclause->feature_label_data.is_typeword_first
8873       && print_typeword
8874       && ! StringHasNoText (thisclause->feature_label_data.typeword)
8875       && ((StringNCmp (thisclause->feature_label_data.typeword, "precursor", 9) == 0
8876             && thisclause->feature_label_data.description [StringLen (thisclause->feature_label_data.description) - 1] != ')')
8877           || (clause_len > StringLen ("precursor")
8878               && StringCmp ( thisclause->feature_label_data.description
8879                      + clause_len - StringLen ("precursor") - 2,
8880                      "precursor") == 0)))
8881     {
8882       print_comma_between_description_and_typeword = TRUE;
8883       clause_len += 1;
8884     }
8885     else
8886     {
8887       print_comma_between_description_and_typeword = FALSE;
8888     }
8889 
8890     if (print_typeword)
8891       clause_len += StringLen (thisclause->feature_label_data.typeword) + 1;
8892     if (typeword_is_plural)
8893       clause_len += 1;
8894     if (print_and)
8895       clause_len += 4;
8896     if (print_comma)
8897       clause_len += 2;
8898     if (!suppress_allele && DisplayAlleleName (thisclause))
8899     {
8900       clause_len += StringLen (thisclause->allelename) + 10;
8901       if (StringLen (thisclause->allelename) > 0)
8902       {
8903         clause_len += StringLen (thisclause->allelename) + StringLen ("allele ");
8904       }
8905     }
8906 
8907     clause_string = (CharPtr) MemNew (clause_len);
8908     if (clause_string == NULL)
8909       return;
8910     clause_string[0] = 0;
8911     if (print_and)
8912       StringCat (clause_string, "and ");
8913     if (thisclause->feature_label_data.is_typeword_first && print_typeword
8914       && thisclause->feature_label_data.typeword != NULL
8915       && ! StringHasNoText (thisclause->feature_label_data.typeword))
8916     {
8917       StringCat (clause_string, thisclause->feature_label_data.typeword);
8918       if (typeword_is_plural)
8919         StringCat (clause_string, "s");
8920       if (thisclause->feature_label_data.description != NULL)
8921         StringCat (clause_string, " ");
8922     }
8923     if (thisclause->feature_label_data.description != NULL)
8924     {
8925       StringCat (clause_string, thisclause->feature_label_data.description);
8926       if (print_comma_between_description_and_typeword)
8927       {
8928         StringCat (clause_string, ",");
8929       }
8930     }
8931     if (! thisclause->feature_label_data.is_typeword_first && print_typeword
8932       && thisclause->feature_label_data.typeword != NULL
8933       && ! StringHasNoText (thisclause->feature_label_data.typeword))
8934     {
8935       if (!StringHasNoText (thisclause->feature_label_data.description))
8936         StringCat (clause_string, " ");
8937       StringCat (clause_string, thisclause->feature_label_data.typeword);
8938       if (typeword_is_plural)
8939         StringCat (clause_string, "s");
8940       if (!suppress_allele && DisplayAlleleName (thisclause)
8941         && thisclause->allelename != NULL)
8942       {
8943         StringCat (clause_string, ", ");
8944         StringCat (clause_string, thisclause->allelename);
8945         StringCat (clause_string, " allele");
8946       }
8947     }
8948     if (StringLen (clause_string) > 0 )
8949     {
8950       if (print_comma)
8951         StringCat (clause_string, ",");
8952       ValNodeAddStr (strings, 0, clause_string);
8953     }
8954     else
8955     {
8956         MemFree (clause_string);
8957         clause_string = NULL;
8958     }
8959 
8960     if (oneafter == NULL || oneafter_has_interval_change)
8961     {
8962       if (print_semicolon) {
8963         if (thisclause->interval == NULL
8964           || StringHasNoText(thisclause->interval)) {
8965           if (clause_string != NULL) {
8966             StringCat (clause_string, ";");
8967           }
8968         } else if (thisclause->interval[StringLen (thisclause->interval)] != ';') {
8969           new_interval = MemNew (StringLen (thisclause->interval) + 2);
8970           if (new_interval == NULL) return;
8971           StringCpy (new_interval, thisclause->interval);
8972           if (allow_semicolons)
8973           {
8974             StringCat (new_interval, ";");
8975           }
8976           else
8977           {
8978             StringCat (new_interval, ",");
8979           }
8980           MemFree (thisclause->interval);
8981           thisclause->interval = new_interval;
8982         }
8983       }
8984       if (thisclause->interval != NULL
8985         && !StringHasNoText (thisclause->interval))
8986       {
8987         ValNodeAddStr (strings, 0, StringSave (thisclause->interval));
8988       }
8989     }
8990     twobefore = onebefore;
8991     onebefore = thisclause;
8992     thisclause = oneafter;
8993     clauselist = voneafter;
8994   }
8995 }
8996 
GetMoleculeType(BioseqPtr bsp,Uint2 entityID)8997 static Uint1 GetMoleculeType
8998 (BioseqPtr bsp,
8999  Uint2     entityID)
9000 {
9001   SeqDescPtr         sdp;
9002   MolInfoPtr         mip;
9003   SeqMgrDescContext  dcontext;
9004 
9005   if (bsp == NULL) return MOLECULE_TYPE_GENOMIC;
9006   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
9007   if (sdp == NULL) return MOLECULE_TYPE_GENOMIC;
9008   mip = (MolInfoPtr) sdp->data.ptrvalue;
9009   if (mip == NULL) return MOLECULE_TYPE_GENOMIC;
9010   return mip->biomol;
9011 }
9012 
SpecialHandlingForSpecialTechniques(BioseqPtr bsp)9013 static Boolean SpecialHandlingForSpecialTechniques (
9014   BioseqPtr bsp
9015 )
9016 {
9017   SeqDescPtr sdp;
9018   MolInfoPtr mip;
9019 
9020   if (bsp == NULL) return MOLECULE_TYPE_GENOMIC;
9021   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, NULL);
9022   if (sdp == NULL)
9023   {
9024     for (sdp = bsp->descr;
9025          sdp != NULL && sdp->choice != Seq_descr_molinfo;
9026          sdp = sdp->next)
9027     {}
9028   }
9029   if (sdp == NULL) return FALSE;
9030   mip = (MolInfoPtr) sdp->data.ptrvalue;
9031   if (mip == NULL) return FALSE;
9032   if (mip->tech == MI_TECH_htgs_0 ||
9033       mip->tech == MI_TECH_htgs_1 ||
9034       mip->tech == MI_TECH_htgs_2 ||
9035       mip->tech == MI_TECH_est ||
9036       mip->tech == MI_TECH_sts ||
9037       mip->tech == MI_TECH_survey ||
9038       mip->tech == MI_TECH_wgs) {
9039     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, NULL);
9040     if (sdp != NULL) {
9041       return TRUE;
9042     }
9043   }
9044 
9045   return FALSE;
9046 }
9047 
ShouldRemoveExon(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9048 static Boolean LIBCALLBACK ShouldRemoveExon (
9049   SeqFeatPtr sfp,
9050   FeatureClausePtr parent_fcp,
9051   FeatureClausePtr this_fcp,
9052   BioseqPtr bsp,
9053   Boolean isLonely,
9054   Boolean isRequested,
9055   Boolean isSegment,
9056   DeflineFeatureRequestListPtr rp
9057 )
9058 {
9059   Boolean partial3, partial5;
9060   SeqFeatPtr main_feat;
9061 
9062   if (isSegment || isLonely || isRequested) return FALSE;
9063   if (parent_fcp == NULL
9064     || parent_fcp->featlist == NULL
9065     || parent_fcp->featlist->data.ptrvalue == NULL)
9066   {
9067     return TRUE;
9068   }
9069 
9070   main_feat = parent_fcp->featlist->data.ptrvalue;
9071   if ( IsCDS (main_feat))
9072   {
9073     CheckSeqLocForPartial (main_feat->location, &partial5, &partial3);
9074     if (partial5 || partial3) return FALSE;
9075   }
9076   else if (IsmRNA (main_feat) || parent_fcp->has_mrna)
9077   {
9078     return FALSE;
9079   }
9080   return TRUE;
9081 }
9082 
ShouldRemoveCDS(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9083 static Boolean LIBCALLBACK ShouldRemoveCDS (
9084   SeqFeatPtr sfp,
9085   FeatureClausePtr parent_fcp,
9086   FeatureClausePtr this_fcp,
9087   BioseqPtr bsp,
9088   Boolean isLonely,
9089   Boolean isRequested,
9090   Boolean isSegment,
9091   DeflineFeatureRequestListPtr rp)
9092 {
9093   CharPtr description;
9094   Boolean retval = FALSE;
9095 
9096   description = GetGeneProtDescription (this_fcp, bsp, rp);
9097   if (StringHasNoText (description))
9098   {
9099     retval = TRUE;
9100   }
9101   if (description != NULL) MemFree (description);
9102   return retval;
9103 }
9104 
ShouldRemoveNoncodingProductFeat(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9105 static Boolean LIBCALLBACK ShouldRemoveNoncodingProductFeat (
9106   SeqFeatPtr sfp,
9107   FeatureClausePtr parent_fcp,
9108   FeatureClausePtr this_fcp,
9109   BioseqPtr bsp, Boolean isLonely,
9110   Boolean isRequested,
9111   Boolean isSegment,
9112   DeflineFeatureRequestListPtr rp
9113 )
9114 {
9115   if (isRequested) return FALSE;
9116   return TRUE;
9117 }
9118 
ShouldRemovePromoter(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9119 static Boolean LIBCALLBACK ShouldRemovePromoter (
9120   SeqFeatPtr sfp,
9121   FeatureClausePtr parent_fcp,
9122   FeatureClausePtr this_fcp,
9123   BioseqPtr bsp, Boolean isLonely,
9124   Boolean isRequested,
9125   Boolean isSegment,
9126   DeflineFeatureRequestListPtr rp
9127 )
9128 {
9129   /* remove a promoter if it is in an mRNA or gene clause */
9130   if (isRequested)
9131   {
9132     return FALSE;
9133   }
9134   else if (parent_fcp != NULL
9135       && (parent_fcp->has_mrna
9136         || (parent_fcp->featlist != NULL
9137            && parent_fcp->featlist->choice == DEFLINE_FEATLIST
9138            && parent_fcp->featlist->data.ptrvalue != NULL
9139            && IsmRNA (parent_fcp->featlist->data.ptrvalue))))
9140   {
9141     return TRUE;
9142   }
9143   else if (isLonely)
9144   {
9145     return FALSE;
9146   }
9147   else
9148   {
9149     return TRUE;
9150   }
9151 }
9152 
ShouldRemoveLTR(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9153 static Boolean LIBCALLBACK ShouldRemoveLTR (
9154   SeqFeatPtr sfp,
9155   FeatureClausePtr parent_fcp,
9156   FeatureClausePtr this_fcp,
9157   BioseqPtr bsp,
9158   Boolean isLonely,
9159   Boolean isRequested,
9160   Boolean isSegment,
9161   DeflineFeatureRequestListPtr rp
9162 )
9163 {
9164   if (isRequested)
9165   {
9166     return FALSE;
9167   }
9168   else if (parent_fcp != NULL)
9169   {
9170     return TRUE;
9171   }
9172   else if (isLonely)
9173     return FALSE;
9174   else
9175     return TRUE;
9176 }
9177 
9178 
ShouldRemoveRepeatRegion(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9179 static Boolean LIBCALLBACK ShouldRemoveRepeatRegion (
9180   SeqFeatPtr sfp,
9181   FeatureClausePtr parent_fcp,
9182   FeatureClausePtr this_fcp,
9183   BioseqPtr bsp,
9184   Boolean isLonely,
9185   Boolean isRequested,
9186   Boolean isSegment,
9187   DeflineFeatureRequestListPtr rp
9188 )
9189 {
9190   if (isRequested)
9191   {
9192     return FALSE;
9193   }
9194   else
9195   {
9196     return TRUE;
9197   }
9198 }
9199 
9200 
ShouldRemove3UTR(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9201 static Boolean LIBCALLBACK ShouldRemove3UTR (
9202   SeqFeatPtr sfp,
9203   FeatureClausePtr parent_fcp,
9204   FeatureClausePtr this_fcp,
9205   BioseqPtr bsp,
9206   Boolean isLonely,
9207   Boolean isRequested,
9208   Boolean isSegment,
9209   DeflineFeatureRequestListPtr rp
9210 )
9211 {
9212   if (isLonely || isRequested)
9213     return FALSE;
9214   else
9215     return TRUE;
9216 }
9217 
ShouldRemove5UTR(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9218 static Boolean LIBCALLBACK ShouldRemove5UTR (
9219   SeqFeatPtr sfp,
9220   FeatureClausePtr parent_fcp,
9221   FeatureClausePtr this_fcp,
9222   BioseqPtr bsp,
9223   Boolean isLonely,
9224   Boolean isRequested,
9225   Boolean isSegment,
9226   DeflineFeatureRequestListPtr rp
9227 )
9228 {
9229   if (isLonely || isRequested)
9230     return FALSE;
9231   else
9232     return TRUE;
9233 }
9234 
9235 
ShouldRemoveuORF(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9236 static Boolean LIBCALLBACK ShouldRemoveuORF (
9237   SeqFeatPtr sfp,
9238   FeatureClausePtr parent_fcp,
9239   FeatureClausePtr this_fcp,
9240   BioseqPtr bsp,
9241   Boolean isLonely,
9242   Boolean isRequested,
9243   Boolean isSegment,
9244   DeflineFeatureRequestListPtr rp
9245 )
9246 {
9247   if (isLonely || isRequested)
9248     return FALSE;
9249   else
9250     return TRUE;
9251 }
9252 
9253 
ShouldRemoveIntron(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9254 static Boolean LIBCALLBACK ShouldRemoveIntron (
9255   SeqFeatPtr sfp,
9256   FeatureClausePtr parent_fcp,
9257   FeatureClausePtr this_fcp,
9258   BioseqPtr bsp, Boolean isLonely,
9259   Boolean isRequested,
9260   Boolean isSegment,
9261   DeflineFeatureRequestListPtr rp
9262 )
9263 {
9264   if (isRequested)
9265   {
9266     return FALSE;
9267   }
9268   else if (parent_fcp != NULL
9269       && (parent_fcp->has_mrna
9270         || (parent_fcp->featlist != NULL
9271            && parent_fcp->featlist->choice == DEFLINE_FEATLIST
9272            && parent_fcp->featlist->data.ptrvalue != NULL
9273            && IsmRNA (parent_fcp->featlist->data.ptrvalue))))
9274   {
9275     return TRUE;
9276   }
9277   else if (isLonely)
9278   {
9279     return FALSE;
9280   }
9281   else
9282   {
9283     return TRUE;
9284   }
9285 }
9286 
ShouldRemoveMobileElement(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9287 static Boolean LIBCALLBACK ShouldRemoveMobileElement
9288 ( SeqFeatPtr sfp,
9289   FeatureClausePtr parent_fcp,
9290   FeatureClausePtr this_fcp,
9291   BioseqPtr bsp,
9292   Boolean isLonely,
9293   Boolean isRequested,
9294   Boolean isSegment,
9295   DeflineFeatureRequestListPtr rp)
9296 {
9297   return (!isLonely && !isRequested);
9298 }
9299 
ShouldRemovencRNA(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9300 static Boolean LIBCALLBACK ShouldRemovencRNA
9301 ( SeqFeatPtr sfp,
9302   FeatureClausePtr parent_fcp,
9303   FeatureClausePtr this_fcp,
9304   BioseqPtr bsp,
9305   Boolean isLonely,
9306   Boolean isRequested,
9307   Boolean isSegment,
9308   DeflineFeatureRequestListPtr rp)
9309 {
9310   Boolean rval = FALSE;
9311   SeqMgrFeatContext context;
9312   SeqFeatPtr precursor_rna;
9313     Int2 cmpval;
9314 
9315   if (isRequested) {
9316     return FALSE;
9317   }
9318   for (precursor_rna = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_preRNA, &context);
9319        precursor_rna != NULL && !rval;
9320        precursor_rna = SeqMgrGetNextFeature (bsp, precursor_rna, 0, FEATDEF_preRNA, &context)) {
9321     cmpval = SeqLocCompare (sfp->location, precursor_rna->location);
9322     if (cmpval != SLC_NO_MATCH) {
9323       rval = TRUE;
9324     }
9325   }
9326   return rval;
9327 }
9328 
9329 
ShouldRemoveGeneric(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9330 static Boolean LIBCALLBACK ShouldRemoveGeneric
9331 ( SeqFeatPtr sfp,
9332   FeatureClausePtr parent_fcp,
9333   FeatureClausePtr this_fcp,
9334   BioseqPtr bsp,
9335   Boolean isLonely,
9336   Boolean isRequested,
9337   Boolean isSegment,
9338   DeflineFeatureRequestListPtr rp)
9339 {
9340   CharPtr productname;
9341   Boolean rval;
9342 
9343   rval = FALSE;
9344   if (IsMiscRNA (sfp) && ( productname = GetProductName (sfp, bsp, rp)) != NULL)
9345   {
9346     if (StringStr (productname, "trans-spliced leader") != NULL)
9347     {
9348       rval = TRUE;
9349     }
9350     MemFree (productname);
9351   }
9352 
9353   return rval;
9354 }
9355 
9356 
IsBioseqPrecursorRNA(BioseqPtr bsp)9357 static Boolean IsBioseqPrecursorRNA (BioseqPtr bsp)
9358 {
9359   SeqDescrPtr       sdp;
9360   SeqMgrDescContext context;
9361   MolInfoPtr        mol;
9362 
9363   if (bsp == NULL) return FALSE;
9364 
9365   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
9366   if (sdp != NULL && sdp->data.ptrvalue != NULL)
9367   {
9368         mol = (MolInfoPtr) sdp->data.ptrvalue;
9369     if (mol->biomol == 2)
9370     {
9371       return TRUE;
9372     }
9373   }
9374   return FALSE;
9375 }
9376 
ShouldRemovePrecursorRNA(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isRequested,Boolean isSegment,DeflineFeatureRequestListPtr rp)9377 static Boolean LIBCALLBACK ShouldRemovePrecursorRNA
9378 ( SeqFeatPtr sfp,
9379   FeatureClausePtr parent_fcp,
9380   FeatureClausePtr this_fcp,
9381   BioseqPtr bsp,
9382   Boolean isLonely,
9383   Boolean isRequested,
9384   Boolean isSegment,
9385   DeflineFeatureRequestListPtr rp)
9386 {
9387   if (!isLonely && IsBioseqPrecursorRNA(bsp) && !isRequested)
9388   {
9389     return TRUE;
9390   }
9391   else
9392   {
9393     return ShouldRemoveGeneric (sfp, parent_fcp, this_fcp, bsp, isLonely,
9394                                 isRequested, isSegment, rp);
9395   }
9396 }
9397 
9398 
9399 typedef struct removableitemglobal {
9400   matchFunction  itemmatch;
9401   ShouldRemoveFunction ShouldRemove;
9402   CharPtr  group_name;
9403 } RemovableItemGlobalData, PNTR RemovableItemGlobalPtr;
9404 
9405 typedef struct removableitemlocal {
9406 /*  ButtoN  keep_request; */
9407   Boolean  keep;
9408 } RemovableItemLocalData, PNTR RemovableItemLocalPtr;
9409 
9410 static RemovableItemGlobalData remove_items[] = {
9411   { IsExon, ShouldRemoveExon, "Exons" },
9412   { IsIntron, ShouldRemoveIntron, "Introns" },
9413   { Is5UTR, ShouldRemove5UTR, "5' UTRs" },
9414   { Is3UTR, ShouldRemove3UTR, "3' UTRs" },
9415   { IsuORF, ShouldRemoveuORF, "uORFs"},
9416   { IsCDS,  ShouldRemoveCDS, "CDSs" },
9417   { IsPromoter, ShouldRemovePromoter, "Promoters:" },
9418   { IsLTR, ShouldRemoveLTR, "LTRs" },
9419   { IsNoncodingProductFeat,  ShouldRemoveNoncodingProductFeat, "Misc feats with comments:" },
9420   { IsRemovableMobileElement, ShouldRemoveMobileElement, "Optional Mobile Element" },
9421   { IsPrecursorRNA, ShouldRemovePrecursorRNA, "Precursor RNAs" },
9422   { IsncRNA, ShouldRemovencRNA, "ncRNAs that overlap precursor RNAs"},
9423   { IsRepeatRegion, ShouldRemoveRepeatRegion, "Repeat regions" }
9424 };
9425 
9426 
9427 //LCOV_EXCL_START
9428 //Not part of Autodef or Cleanup, used for GUI
GetRemovableItemName(Int4 i)9429 NLM_EXTERN CharPtr GetRemovableItemName (Int4 i)
9430 {
9431   if (i < 0 || i >= NumRemovableItems) {
9432     return NULL;
9433   } else {
9434     return remove_items[i].group_name;
9435   }
9436 }
9437 //LCOV_EXCL_STOP
9438 
InitFeatureRequests(DeflineFeatureRequestListPtr feature_requests)9439 NLM_EXTERN void InitFeatureRequests (
9440   DeflineFeatureRequestListPtr feature_requests
9441 )
9442 {
9443   Int4 i;
9444   for (i=0; i < NumRemovableItems; i++)
9445   {
9446     feature_requests->keep_items[i] = FALSE;
9447   }
9448   feature_requests->add_fake_promoters = TRUE;
9449   feature_requests->suppress_alt_splice_phrase = FALSE;
9450   feature_requests->remove_subfeatures = FALSE;
9451   feature_requests->feature_list_type = DEFLINE_USE_FEATURES;
9452   feature_requests->misc_feat_parse_rule = 2;
9453   feature_requests->suppress_locus_tags = FALSE;
9454   feature_requests->suppressed_feature_list = NULL;
9455   feature_requests->use_ncrna_note = FALSE;
9456   feature_requests->suppress_allele = FALSE;
9457 }
9458 
9459 
9460 //LCOV_EXCL_START
9461 //Not part of Autodef or Cleanup
FreeDeflineFeatureRequestList(DeflineFeatureRequestListPtr feature_requests)9462 NLM_EXTERN DeflineFeatureRequestListPtr FreeDeflineFeatureRequestList (DeflineFeatureRequestListPtr feature_requests)
9463 {
9464   if (feature_requests != NULL) {
9465     feature_requests->suppressed_feature_list = ValNodeFree (feature_requests->suppressed_feature_list);
9466     feature_requests = MemFree (feature_requests);
9467   }
9468   return feature_requests;
9469 }
9470 //LCOV_EXCL_STOP
9471 
9472 
RemoveCondition(SeqFeatPtr sfp,FeatureClausePtr parent_fcp,FeatureClausePtr this_fcp,BioseqPtr bsp,Boolean isLonely,Boolean isSegment,DeflineFeatureRequestList * feature_requests)9473 static Boolean RemoveCondition (
9474   SeqFeatPtr sfp,
9475   FeatureClausePtr parent_fcp,
9476   FeatureClausePtr this_fcp,
9477   BioseqPtr bsp,
9478   Boolean isLonely,
9479   Boolean isSegment,
9480   DeflineFeatureRequestList *feature_requests
9481 )
9482 {
9483   Int4 i;
9484   if (sfp == NULL) return TRUE;
9485   for (i=0; i < NumRemovableItems; i++)
9486   {
9487     if (remove_items[i].itemmatch (sfp))
9488       return remove_items[i].ShouldRemove (sfp, parent_fcp, this_fcp, bsp,
9489                                            isLonely, feature_requests->keep_items[i],
9490                                            isSegment,
9491                                            feature_requests);
9492   }
9493   return ShouldRemoveGeneric(sfp, parent_fcp, this_fcp, bsp, isLonely, FALSE,
9494                              isSegment, feature_requests);
9495 }
9496 
FindOtherGeneClause(ValNodePtr feature_list,ValNodePtr me,GeneRefPtr grp,Boolean suppress_locus_tag)9497 static Boolean FindOtherGeneClause
9498 ( ValNodePtr feature_list,
9499   ValNodePtr me,
9500   GeneRefPtr grp,
9501   Boolean    suppress_locus_tag)
9502 {
9503   ValNodePtr vnp;
9504   FeatureClausePtr fcp;
9505 
9506   if (grp == NULL) return FALSE;
9507 
9508   for (vnp = feature_list; vnp != NULL; vnp = vnp->next)
9509   {
9510     if (vnp == me) continue;
9511     if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL)
9512     {
9513       fcp = vnp->data.ptrvalue;
9514       if (fcp->delete_me) continue;
9515       if ( fcp->grp == grp
9516         || (fcp->grp != NULL && DoGenesMatch (fcp->grp, grp, suppress_locus_tag)))
9517       {
9518         return TRUE;
9519       }
9520       if ( FindOtherGeneClause (fcp->featlist, me, grp, suppress_locus_tag))
9521       {
9522         return TRUE;
9523       }
9524     }
9525   }
9526   return FALSE;
9527 }
9528 
RemoveGenesMentionedElsewhere(ValNodePtr PNTR feature_list,ValNodePtr search_list,Boolean delete_now,Boolean suppress_locus_tag)9529 static void RemoveGenesMentionedElsewhere
9530 ( ValNodePtr PNTR feature_list,
9531   ValNodePtr      search_list,
9532   Boolean         delete_now,
9533   Boolean         suppress_locus_tag)
9534 {
9535   ValNodePtr vnp;
9536   FeatureClausePtr fcp;
9537 
9538   for (vnp = *feature_list; vnp != NULL; vnp = vnp->next)
9539   {
9540     if (vnp->choice == DEFLINE_CLAUSEPLUS && vnp->data.ptrvalue != NULL)
9541     {
9542       fcp = vnp->data.ptrvalue;
9543       if (fcp->featlist == NULL)
9544       {
9545         continue;
9546       }
9547       if ( IsGene (fcp->featlist->data.ptrvalue)
9548         && fcp->featlist->next == NULL
9549         && FindOtherGeneClause ( search_list, vnp, fcp->grp, suppress_locus_tag))
9550       {
9551         fcp->delete_me = TRUE;
9552       }
9553       else
9554       {
9555         RemoveGenesMentionedElsewhere ( &(fcp->featlist), search_list, FALSE, suppress_locus_tag);
9556       }
9557     }
9558   }
9559   if (delete_now)
9560   {
9561     DeleteFeatureClauses (feature_list);
9562   }
9563 }
9564 
MarkUnwantedFeatureClauseForRemoval(ValNodePtr clause,BioseqPtr bsp,Boolean isLonely,FeatureClausePtr parent_fcp,Boolean isSegment,DeflineFeatureRequestList PNTR feature_requests)9565 static void MarkUnwantedFeatureClauseForRemoval (
9566   ValNodePtr clause,
9567   BioseqPtr  bsp,
9568   Boolean    isLonely,
9569   FeatureClausePtr parent_fcp,
9570   Boolean    isSegment,
9571   DeflineFeatureRequestList PNTR feature_requests
9572 )
9573 {
9574   FeatureClausePtr fcp;
9575   ValNodePtr       featlist;
9576   ValNodePtr       firstfeat;
9577   Int4             clause_count;
9578   SeqFeatPtr       sfp;
9579 
9580   if (clause == NULL
9581     || clause->choice != DEFLINE_CLAUSEPLUS
9582     || clause->data.ptrvalue == NULL)
9583   {
9584     return;
9585   }
9586 
9587   fcp = clause->data.ptrvalue;
9588   firstfeat = fcp->featlist;
9589   clause_count = 0;
9590   for (featlist = firstfeat;
9591        featlist != NULL && isLonely;
9592        featlist = featlist->next)
9593   {
9594     if (featlist->choice == DEFLINE_CLAUSEPLUS)
9595     {
9596       clause_count ++;
9597       if (clause_count > 1)
9598       {
9599         isLonely = FALSE;
9600       }
9601     }
9602   }
9603 
9604   featlist = firstfeat;
9605   while (featlist != NULL)
9606   {
9607     if (featlist->choice == DEFLINE_FEATLIST
9608       && featlist->data.ptrvalue != NULL)
9609     {
9610       sfp = (SeqFeatPtr) featlist->data.ptrvalue;
9611       if (RemoveCondition (featlist->data.ptrvalue, parent_fcp, fcp, bsp,
9612                           isLonely, isSegment, feature_requests))
9613       {
9614         fcp->delete_me = TRUE;
9615       }
9616       else if (! IsGene (sfp) && ! IsmRNA (sfp))
9617       {
9618         isLonely = FALSE;
9619       }
9620     }
9621     else if (featlist->choice == DEFLINE_CLAUSEPLUS
9622       && featlist->data.ptrvalue != NULL)
9623     {
9624       MarkUnwantedFeatureClauseForRemoval (featlist, bsp, isLonely, fcp,
9625                                            isSegment,
9626                                            feature_requests);
9627     }
9628     featlist = featlist->next;
9629   }
9630 }
9631 
RemoveUnwantedFeatures(ValNodePtr PNTR list,BioseqPtr bsp,Boolean isSegment,DeflineFeatureRequestList PNTR feature_requests)9632 static void RemoveUnwantedFeatures (
9633   ValNodePtr PNTR list,
9634   BioseqPtr bsp,
9635   Boolean   isSegment,
9636   DeflineFeatureRequestList PNTR feature_requests
9637 )
9638 {
9639   ValNodePtr       vnp;
9640   Boolean          isLonely;
9641 
9642   if (list == NULL) return;
9643 
9644   isLonely = TRUE;
9645 
9646   for (vnp = *list; vnp != NULL; vnp = vnp->next)
9647   {
9648     if (vnp->next != NULL) isLonely = FALSE;
9649     if (vnp->choice == DEFLINE_CLAUSEPLUS)
9650     {
9651       MarkUnwantedFeatureClauseForRemoval (vnp, bsp, isLonely, NULL,
9652                                            isSegment, feature_requests);
9653     }
9654   }
9655   DeleteFeatureClauses (list);
9656 }
9657 
IsFeatureInSelectionList(SeqFeatPtr sfp,ValNodePtr feat_list)9658 static Boolean IsFeatureInSelectionList (SeqFeatPtr sfp, ValNodePtr feat_list)
9659 {
9660   ValNodePtr       vnp;
9661 
9662   if (sfp == NULL || feat_list == NULL)
9663   {
9664     return FALSE;
9665   }
9666 
9667   for (vnp = feat_list; vnp != NULL && sfp->idx.subtype != vnp->choice; vnp = vnp->next)
9668   {
9669   }
9670   if (vnp == NULL)
9671   {
9672     return FALSE;
9673   }
9674   else
9675   {
9676     return TRUE;
9677   }
9678 }
9679 
MarkSuppressedFeatureClauseForRemoval(ValNodePtr clause,ValNodePtr suppressed_feature_list)9680 static void MarkSuppressedFeatureClauseForRemoval (
9681   ValNodePtr clause,
9682   ValNodePtr suppressed_feature_list
9683 )
9684 {
9685   FeatureClausePtr fcp;
9686   ValNodePtr       featlist;
9687   ValNodePtr       firstfeat;
9688   SeqFeatPtr       sfp;
9689 
9690   if (clause == NULL
9691     || clause->choice != DEFLINE_CLAUSEPLUS
9692     || clause->data.ptrvalue == NULL)
9693   {
9694     return;
9695   }
9696 
9697   fcp = clause->data.ptrvalue;
9698   firstfeat = fcp->featlist;
9699 
9700   featlist = firstfeat;
9701   while (featlist != NULL)
9702   {
9703     if (featlist->choice == DEFLINE_FEATLIST
9704       && featlist->data.ptrvalue != NULL)
9705     {
9706       sfp = (SeqFeatPtr) featlist->data.ptrvalue;
9707       if (IsFeatureInSelectionList (sfp, suppressed_feature_list))
9708       {
9709         fcp->delete_me = TRUE;
9710       }
9711     }
9712     else if (featlist->choice == DEFLINE_CLAUSEPLUS
9713       && featlist->data.ptrvalue != NULL)
9714     {
9715       MarkSuppressedFeatureClauseForRemoval (featlist, suppressed_feature_list);
9716     }
9717     featlist = featlist->next;
9718   }
9719 }
9720 
RemoveSuppressedFeatures(ValNodePtr PNTR list,ValNodePtr suppressed_feature_list)9721 static void RemoveSuppressedFeatures (ValNodePtr PNTR list,
9722                                       ValNodePtr suppressed_feature_list)
9723 {
9724   ValNodePtr vnp;
9725 
9726   if (list == NULL || *list == NULL || suppressed_feature_list == NULL)
9727   {
9728     return;
9729   }
9730 
9731   for (vnp = *list; vnp != NULL; vnp = vnp->next)
9732   {
9733     if (vnp->choice == DEFLINE_CLAUSEPLUS)
9734     {
9735       MarkSuppressedFeatureClauseForRemoval (vnp, suppressed_feature_list);
9736     }
9737   }
9738   DeleteFeatureClauses (list);
9739 }
9740 
IsMasterClause(SeqFeatPtr sfp)9741 static Boolean LIBCALLBACK IsMasterClause (
9742   SeqFeatPtr sfp
9743 )
9744 {
9745   if ( IsMobileElement (sfp)) return TRUE;
9746   return FALSE;
9747 }
9748 
DeleteSubfeatures(ValNodePtr PNTR feature_list,Boolean delete_now)9749 static void DeleteSubfeatures (
9750   ValNodePtr PNTR feature_list,
9751   Boolean         delete_now
9752 )
9753 {
9754   ValNodePtr       clause, featlist;
9755   FeatureClausePtr clause_fcp, fcp;
9756 
9757   if (feature_list == NULL) return;
9758   for (clause = *feature_list; clause != NULL; clause = clause->next)
9759   {
9760     if (clause->choice != DEFLINE_CLAUSEPLUS
9761       || (clause_fcp = clause->data.ptrvalue) == NULL
9762       || clause_fcp->featlist == NULL)
9763     {
9764       continue;
9765     }
9766     if (clause_fcp->featlist->choice == DEFLINE_FEATLIST
9767       && IsMasterClause (clause_fcp->featlist->data.ptrvalue))
9768     {
9769       for (featlist = clause_fcp->featlist->next;
9770            featlist != NULL;
9771            featlist = featlist->next)
9772       {
9773         if (featlist->choice == DEFLINE_CLAUSEPLUS
9774           && (fcp = featlist->data.ptrvalue) != NULL)
9775         {
9776           fcp->delete_me = TRUE;
9777         }
9778       }
9779     }
9780     else
9781     {
9782       DeleteSubfeatures ( &(clause_fcp->featlist), FALSE);
9783     }
9784   }
9785   if (delete_now)
9786   {
9787     DeleteFeatureClauses (feature_list);
9788   }
9789 }
9790 
DeleteOperonAndGeneClusterSubfeatures(ValNodePtr PNTR feature_list,Boolean delete_now)9791 static void DeleteOperonAndGeneClusterSubfeatures (
9792   ValNodePtr PNTR feature_list,
9793   Boolean         delete_now
9794 )
9795 {
9796   ValNodePtr       clause, featlist;
9797   FeatureClausePtr clause_fcp, fcp;
9798 
9799   if (feature_list == NULL) return;
9800   for (clause = *feature_list; clause != NULL; clause = clause->next)
9801   {
9802     if (clause->choice != DEFLINE_CLAUSEPLUS
9803       || (clause_fcp = clause->data.ptrvalue) == NULL
9804       || clause_fcp->featlist == NULL)
9805     {
9806       continue;
9807     }
9808     if (clause_fcp->featlist->choice == DEFLINE_FEATLIST
9809       && (IsOperon (clause_fcp->featlist->data.ptrvalue)
9810           || IsGeneCluster (clause_fcp->featlist->data.ptrvalue)))
9811     {
9812       for (featlist = clause_fcp->featlist->next;
9813            featlist != NULL;
9814            featlist = featlist->next)
9815       {
9816         if (featlist->choice == DEFLINE_CLAUSEPLUS
9817           && (fcp = featlist->data.ptrvalue) != NULL)
9818         {
9819           fcp->delete_me = TRUE;
9820         }
9821       }
9822     }
9823     else
9824     {
9825       DeleteOperonAndGeneClusterSubfeatures ( &(clause_fcp->featlist), FALSE);
9826     }
9827   }
9828   if (delete_now)
9829   {
9830     DeleteFeatureClauses (feature_list);
9831   }
9832 }
9833 
RemoveFeats(ValNodePtr list,matchFunction itemmatch)9834 static void RemoveFeats (
9835   ValNodePtr    list,
9836   matchFunction itemmatch
9837 )
9838 {
9839   ValNodePtr vnp;
9840   FeatureClausePtr fcp;
9841 
9842   if (list == NULL) return;
9843 
9844   for (vnp = list; vnp != NULL; vnp = vnp->next)
9845   {
9846     if (vnp->choice == DEFLINE_FEATLIST
9847       && itemmatch (vnp->data.ptrvalue))
9848     {
9849       vnp->choice = DEFLINE_REMOVEFEAT;
9850     }
9851     else if (vnp->choice == DEFLINE_CLAUSEPLUS
9852       && (fcp = vnp->data.ptrvalue) != NULL)
9853     {
9854       RemoveFeats (fcp->featlist, itemmatch);
9855     }
9856   }
9857 }
9858 
9859 /* A clause is "tall" if it has only one clause at any level */
IsClauseTall(FeatureClausePtr fcp)9860 static Boolean IsClauseTall (
9861   FeatureClausePtr fcp
9862 )
9863 {
9864   ValNodePtr featlist;
9865   Int4       num_clauses;
9866   FeatureClausePtr subclause;
9867 
9868   num_clauses = 0;
9869   if (fcp == NULL) return FALSE;
9870   subclause = NULL;
9871   if (fcp->featlist == NULL) return FALSE;
9872   for (featlist = fcp->featlist;
9873        featlist != NULL;
9874        featlist = featlist->next)
9875   {
9876     if (featlist->choice == DEFLINE_CLAUSEPLUS)
9877     {
9878       subclause = featlist->data.ptrvalue;
9879       if (subclause == NULL || ! IsClauseTall (subclause))
9880       {
9881         return FALSE;
9882       }
9883       num_clauses ++;
9884       if (num_clauses > 1) return FALSE;
9885     }
9886   }
9887   if (subclause == NULL || ! subclause->feature_label_data.is_typeword_first)
9888   {
9889     return TRUE;
9890   }
9891   return FALSE;
9892 }
9893 
SmashOneTallClause(FeatureClausePtr fcp)9894 static void SmashOneTallClause (
9895   FeatureClausePtr fcp
9896 )
9897 {
9898   FeatureClausePtr subclause;
9899   ValNodePtr       featlist;
9900   ValNodePtr       subclause_featlist;
9901   ValNodePtr       subclause_firstclause;
9902   CharPtr          new_description;
9903   Int4             new_description_len;
9904   SeqFeatPtr       main_feat;
9905 
9906   if (fcp == NULL || fcp->featlist == NULL) return;
9907 
9908   /* move features up */
9909   featlist = fcp->featlist;
9910   if (featlist->choice == DEFLINE_FEATLIST)
9911   {
9912     main_feat = fcp->featlist->data.ptrvalue;
9913   }
9914   else
9915   {
9916     main_feat = NULL;
9917   }
9918 
9919   while (featlist != NULL && featlist->choice != DEFLINE_CLAUSEPLUS)
9920   {
9921     featlist = featlist->next;
9922   }
9923   if (featlist == NULL) return;
9924   subclause = featlist->data.ptrvalue;
9925   if (subclause == NULL) return;
9926 
9927   /* move subclause feats to top of list */
9928   if (subclause->featlist != NULL
9929     && subclause->featlist->choice == DEFLINE_FEATLIST)
9930   {
9931     subclause_featlist = subclause->featlist;
9932     while (subclause->featlist != NULL
9933            && subclause->featlist->next != NULL
9934            && subclause->featlist->next->choice == DEFLINE_FEATLIST)
9935     {
9936       subclause->featlist = subclause->featlist->next;
9937     }
9938     if (subclause->featlist != NULL)
9939     {
9940       subclause_firstclause = subclause->featlist->next;
9941       subclause->featlist->next = fcp->featlist;
9942       fcp->featlist = subclause->featlist;
9943       subclause->featlist = subclause_firstclause;
9944     }
9945   }
9946 
9947   /* create new description */
9948   new_description_len = StringLen (subclause->feature_label_data.description)
9949                    + StringLen (fcp->feature_label_data.description)
9950                    + StringLen (fcp->feature_label_data.typeword)
9951                    + 4;
9952   new_description = (CharPtr) MemNew (new_description_len);
9953   if (new_description == NULL) return;
9954   new_description [0] = 0;
9955   if ( fcp->feature_label_data.is_typeword_first)
9956   {
9957     StringCat (new_description, fcp->feature_label_data.typeword);
9958     StringCat (new_description, " ");
9959   }
9960   StringCat (new_description, fcp->feature_label_data.description);
9961   if ( ! fcp->feature_label_data.is_typeword_first)
9962   {
9963     StringCat (new_description, fcp->feature_label_data.typeword);
9964   }
9965 
9966   if ( ! IsMobileElement (main_feat)
9967     && ! IsEndogenousVirusSourceFeature (main_feat))
9968   {
9969     StringCat (new_description, ",");
9970   }
9971   StringCat (new_description, " ");
9972   StringCat (new_description, subclause->feature_label_data.description);
9973 
9974   if (fcp->feature_label_data.description != NULL)
9975   {
9976     MemFree (fcp->feature_label_data.description);
9977   }
9978   fcp->feature_label_data.description = new_description;
9979 
9980   /* move interval up */
9981   if (fcp->interval != NULL)
9982   {
9983     MemFree (fcp->interval);
9984   }
9985   fcp->interval = subclause->interval;
9986   subclause->interval = NULL;
9987 
9988   /* move typeword up */
9989   fcp->feature_label_data.typeword = subclause->feature_label_data.typeword;
9990   fcp->feature_label_data.is_typeword_first =
9991                subclause->feature_label_data.is_typeword_first;
9992   subclause->feature_label_data.typeword = NULL;
9993   subclause->delete_me = TRUE;
9994 
9995 }
9996 
9997 
SmashTallClauses(ValNodePtr PNTR clause_list,Boolean delete_now)9998 static void SmashTallClauses (
9999   ValNodePtr PNTR clause_list,
10000   Boolean         delete_now
10001 )
10002 {
10003   ValNodePtr clause;
10004   FeatureClausePtr fcp;
10005 
10006   if (clause_list == NULL) return;
10007   for (clause = *clause_list; clause != NULL; clause = clause->next)
10008   {
10009     if (clause->choice != DEFLINE_CLAUSEPLUS || clause->data.ptrvalue == NULL)
10010     {
10011       continue;
10012     }
10013     fcp = clause->data.ptrvalue;
10014     if ( IsClauseTall (fcp))
10015     {
10016       SmashOneTallClause (fcp);
10017     }
10018     else
10019     {
10020       SmashTallClauses (& (fcp->featlist), FALSE);
10021     }
10022   }
10023   if (delete_now)
10024   {
10025     DeleteFeatureClauses (clause_list);
10026   }
10027 }
10028 
RemoveAllButLastCDS(ValNodePtr list,ValNodePtr last_cds)10029 static ValNodePtr RemoveAllButLastCDS (
10030   ValNodePtr list,
10031   ValNodePtr last_cds
10032 )
10033 {
10034   ValNodePtr vnp;
10035   FeatureClausePtr fcp;
10036 
10037   /* now remove all CDSs except the last one */
10038   for (vnp = list; vnp != NULL; vnp = vnp->next)
10039   {
10040     if (vnp->choice == DEFLINE_FEATLIST
10041       && IsCDS (vnp->data.ptrvalue))
10042     {
10043       if (last_cds != NULL)
10044       {
10045         last_cds->choice = DEFLINE_REMOVEFEAT;
10046       }
10047       last_cds = vnp;
10048     }
10049     else if (vnp->choice == DEFLINE_CLAUSEPLUS
10050       && (fcp = vnp->data.ptrvalue) != NULL)
10051     {
10052       last_cds = RemoveAllButLastCDS (fcp->featlist, last_cds);
10053     }
10054   }
10055   return last_cds;
10056 }
10057 
OkToConsolidate(CharPtr last_desc,CharPtr new_desc,Boolean last_partial,Boolean new_partial,FeatureClausePtr last_fcp,FeatureClausePtr fcp)10058 static Boolean OkToConsolidate (
10059   CharPtr last_desc,
10060   CharPtr new_desc,
10061   Boolean last_partial,
10062   Boolean new_partial,
10063   FeatureClausePtr last_fcp,
10064   FeatureClausePtr fcp
10065 )
10066 {
10067   if (StringCmp (last_desc, new_desc) != 0) return FALSE;
10068   if (new_partial != last_partial) return FALSE;
10069   if ( ( fcp->is_alt_spliced && ! last_fcp->is_alt_spliced)
10070       || (! fcp->is_alt_spliced && last_fcp->is_alt_spliced))
10071   {
10072     return FALSE;
10073   }
10074   if (fcp->featlist == NULL || last_fcp->featlist == NULL) return FALSE;
10075   if ( fcp->featlist->choice != DEFLINE_FEATLIST) return FALSE;
10076   if ( last_fcp->featlist->choice != DEFLINE_FEATLIST) return FALSE;
10077   if ( (IsCDS (fcp->featlist->data.ptrvalue)
10078         && ! IsCDS (last_fcp->featlist->data.ptrvalue)
10079         && ! IsGene (last_fcp->featlist->data.ptrvalue))
10080       || (! IsCDS (fcp->featlist->data.ptrvalue)
10081         && ! IsGene (fcp->featlist->data.ptrvalue)
10082         && IsCDS (last_fcp->featlist->data.ptrvalue)))
10083   {
10084     return FALSE;
10085   }
10086   if ((IsExon (fcp->featlist->data.ptrvalue) && !IsExon(last_fcp->featlist->data.ptrvalue))
10087       || (IsExon (last_fcp->featlist->data.ptrvalue) && !IsExon(fcp->featlist->data.ptrvalue))
10088       || (IsIntron (fcp->featlist->data.ptrvalue) && !IsIntron(last_fcp->featlist->data.ptrvalue))
10089       || (IsIntron (last_fcp->featlist->data.ptrvalue) && !IsIntron(fcp->featlist->data.ptrvalue)))
10090   {
10091     return FALSE;
10092   }
10093   return TRUE;
10094 }
10095 
RemoveRedundantGeneFeatureFromConsolidatedClause(FeatureClausePtr fcp)10096 static void RemoveRedundantGeneFeatureFromConsolidatedClause (
10097   FeatureClausePtr fcp
10098 )
10099 {
10100   ValNodePtr featlist, prevfeat, tmpfeat;
10101   SeqFeatPtr feat1, feat2;
10102 
10103   prevfeat = NULL;
10104   featlist = fcp->featlist;
10105   while ( featlist != NULL
10106          && featlist->choice == DEFLINE_FEATLIST
10107          && featlist->next != NULL
10108          && featlist->next->choice == DEFLINE_FEATLIST)
10109   {
10110     feat1 = featlist->data.ptrvalue;
10111     feat2 = featlist->next->data.ptrvalue;
10112     if (feat1 == NULL || feat2 == NULL) return;
10113     if (IsGene (feat1) && ! IsGene (feat2))
10114     {
10115       if (prevfeat == NULL)
10116       {
10117         fcp->featlist = featlist->next;
10118         featlist->next = NULL;
10119         FreeListElement (featlist);
10120         featlist = fcp->featlist->next;
10121       }
10122       else
10123       {
10124         prevfeat->next = featlist->next;
10125         featlist->next = NULL;
10126         FreeListElement (featlist);
10127         featlist = prevfeat->next;
10128       }
10129     }
10130     else if ( !IsGene (feat1) && IsGene (feat2))
10131     {
10132       tmpfeat = featlist->next;
10133       featlist->next = tmpfeat->next;
10134       tmpfeat->next = NULL;
10135       FreeListElement (tmpfeat);
10136     }
10137     else
10138     {
10139       featlist = featlist->next;
10140     }
10141   }
10142 }
10143 
PluralizeConsolidatedClauseDescription(FeatureClausePtr fcp)10144 static void PluralizeConsolidatedClauseDescription (
10145   FeatureClausePtr fcp
10146 )
10147 {
10148   CharPtr new_desc;
10149 
10150   /* prevent crash */
10151   if (fcp == NULL || fcp->feature_label_data.description == NULL) return;
10152 
10153   /* don't pluralize tRNA names */
10154   if (StringNCmp (fcp->feature_label_data.description, "tRNA-", 5) ==0) return;
10155 
10156   /* don't pluralize if typeword present */
10157   if (fcp->feature_label_data.typeword != NULL && !StringHasNoText(fcp->feature_label_data.typeword)) return;
10158 
10159   new_desc = MemNew (StringLen (fcp->feature_label_data.description) + 2);
10160   if (new_desc == NULL) return;
10161 
10162   StringCpy (new_desc, fcp->feature_label_data.description);
10163   StringCat (new_desc, "s");
10164   MemFree (fcp->feature_label_data.description);
10165   fcp->feature_label_data.description = new_desc;
10166 }
10167 
ConsolidateClauses(ValNodePtr PNTR list,BioseqPtr bsp,Uint1 biomol,Boolean delete_now,DeflineFeatureRequestListPtr rp)10168 static void ConsolidateClauses (
10169   ValNodePtr PNTR list,
10170   BioseqPtr  bsp,
10171   Uint1      biomol,
10172   Boolean    delete_now,
10173   DeflineFeatureRequestListPtr rp)
10174 {
10175   ValNodePtr       vnp;
10176   FeatureClausePtr fcp;
10177   FeatureClausePtr last_cds_fcp;
10178   CharPtr          last_desc = NULL, new_desc;
10179   Boolean          last_partial, new_partial, partial3, partial5;
10180   SeqLocPtr        new_loc;
10181 
10182   if (list == NULL || *list == NULL) return;
10183   last_cds_fcp = NULL;
10184   for (vnp = *list; vnp != NULL; vnp = vnp->next)
10185   {
10186     if (vnp->choice != DEFLINE_CLAUSEPLUS
10187       || (fcp = vnp->data.ptrvalue) == NULL
10188       || fcp->featlist == NULL
10189       || fcp->featlist->choice != DEFLINE_FEATLIST)
10190     {
10191       continue;
10192     }
10193 
10194     ConsolidateClauses (&(fcp->featlist), bsp, biomol, FALSE, rp);
10195 
10196     if (last_cds_fcp == NULL)
10197     {
10198       last_cds_fcp = fcp;
10199       if (fcp->feature_label_data.description == NULL)
10200       {
10201         last_desc = GetGeneProtDescription (fcp, bsp, rp);
10202       }
10203       else
10204       {
10205         last_desc = StringSave (fcp->feature_label_data.description);
10206       }
10207       CheckSeqLocForPartial (fcp->slp, &partial5, &partial3);
10208       if (partial5 || partial3)
10209       {
10210         last_partial = TRUE;
10211       }
10212       else
10213       {
10214         last_partial = FALSE;
10215       }
10216     }
10217     else
10218     {
10219       if (fcp->feature_label_data.description == NULL)
10220       {
10221         new_desc = GetGeneProtDescription (fcp, bsp, rp);
10222       }
10223       else
10224       {
10225         new_desc = StringSave (fcp->feature_label_data.description);
10226       }
10227       CheckSeqLocForPartial (fcp->slp, &partial5, &partial3);
10228       if (partial5 || partial3)
10229       {
10230         new_partial = TRUE;
10231       }
10232       else
10233       {
10234         new_partial = FALSE;
10235       }
10236       if ( OkToConsolidate (last_desc, new_desc,
10237                             last_partial, new_partial,
10238                             last_cds_fcp, fcp))
10239       {
10240         /* two clauses have identical descriptions - combine them */
10241         MoveSubclauses (last_cds_fcp, fcp);
10242         RemoveRedundantGeneFeatureFromConsolidatedClause (last_cds_fcp);
10243         fcp->featlist = NULL;
10244         fcp->delete_me = TRUE;
10245         new_loc = SeqLocMerge (bsp, last_cds_fcp->slp, fcp->slp,
10246                                          FALSE, TRUE, FALSE);
10247         last_cds_fcp->slp = SeqLocFree (last_cds_fcp->slp);
10248         last_cds_fcp->slp = new_loc;
10249         /* if we have two clauses that are really identical instead of
10250          * just sharing a "prefix", make the description plural
10251          */
10252         if (StringCmp (last_cds_fcp->interval, fcp->interval) == 0)
10253         {
10254           last_cds_fcp->make_plural = TRUE;
10255 /*          PluralizeConsolidatedClauseDescription (last_cds_fcp); */
10256         }
10257 
10258         /* Recalculate the interval */
10259         if (last_cds_fcp->interval != NULL)
10260         {
10261           MemFree (last_cds_fcp->interval);
10262         }
10263         last_cds_fcp->interval =
10264                   GetGenericInterval (last_cds_fcp, biomol, bsp, rp);
10265         MemFree (new_desc);
10266       }
10267       else
10268       {
10269         MemFree (last_desc);
10270         last_desc = new_desc;
10271         last_cds_fcp = fcp;
10272         last_partial = new_partial;
10273       }
10274     }
10275   }
10276   last_desc = MemFree (last_desc);
10277   if (delete_now)
10278   {
10279     DeleteFeatureClauses (list);
10280   }
10281 }
10282 
CountUnknownGenes(ValNodePtr PNTR clause_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)10283 static void CountUnknownGenes (
10284   ValNodePtr PNTR clause_list,
10285   BioseqPtr       bsp,
10286   DeflineFeatureRequestListPtr rp)
10287 {
10288   FeatureClausePtr fcp, new_fcp;
10289   ValNodePtr vnp, new_vnp;
10290   CharPtr gene_name;
10291   Int4 num_unknown_genes;
10292 
10293   num_unknown_genes = 0;
10294   vnp = *clause_list;
10295   new_vnp = NULL;
10296   new_fcp = NULL;
10297   while (vnp != NULL)
10298   {
10299     if (vnp->choice == DEFLINE_CLAUSEPLUS
10300       && (fcp = vnp->data.ptrvalue) != NULL
10301       && ! fcp->is_unknown
10302       && fcp->grp == NULL)
10303     {
10304       CountUnknownGenes (&(fcp->featlist), bsp, rp);
10305       gene_name = GetGeneProtDescription (fcp, bsp, rp);
10306       if (StringCmp (gene_name, "unknown") == 0
10307         && fcp->featlist != NULL
10308         && fcp->featlist->choice == DEFLINE_FEATLIST)
10309       {
10310         if (new_fcp == NULL)
10311         {
10312           new_vnp = ValNodeNew (*clause_list);
10313           if (new_vnp == NULL) return;
10314           new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue,
10315                                       bsp, rp);
10316           new_fcp->is_unknown = TRUE;
10317           new_vnp->choice = DEFLINE_CLAUSEPLUS;
10318           new_vnp->data.ptrvalue = new_fcp;
10319         }
10320         else
10321         {
10322           new_vnp = ValNodeNew (new_fcp->featlist);
10323           if (new_vnp == NULL) return;
10324           new_vnp->choice = DEFLINE_FEATLIST;
10325           new_vnp->data.ptrvalue = fcp->featlist->data.ptrvalue;
10326         }
10327         num_unknown_genes ++;
10328         fcp->delete_me = TRUE;
10329       }
10330       gene_name = MemFree (gene_name);
10331     }
10332     vnp = vnp->next;
10333   }
10334 
10335   if (num_unknown_genes > 0)
10336   {
10337     DeleteFeatureClauses (clause_list);
10338     if (num_unknown_genes > 1)
10339     {
10340       new_fcp->feature_label_data.typeword = StringSave ("genes");
10341     }
10342   }
10343 }
10344 
ReplaceDefinitionLine(SeqEntryPtr sep,CharPtr defline)10345 NLM_EXTERN void ReplaceDefinitionLine (
10346   SeqEntryPtr sep,
10347   CharPtr defline
10348 )
10349 {
10350   ValNodePtr ttl;
10351   if (sep == NULL || defline == NULL) return;
10352 
10353   ttl = SeqEntryGetSeqDescr (sep, Seq_descr_title, NULL);
10354   if (ttl == NULL)
10355     ttl = CreateNewDescriptor (sep, Seq_descr_title);
10356   if (ttl != NULL) {
10357     MemFree (ttl->data.ptrvalue);
10358     ttl->data.ptrvalue = defline;
10359     defline = NULL;
10360   }
10361   MemFree (defline);
10362 }
10363 
NewFeatureClause(SeqFeatPtr sfp,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)10364 FeatureClausePtr NewFeatureClause
10365 ( SeqFeatPtr sfp,
10366   BioseqPtr  bsp,
10367   DeflineFeatureRequestListPtr rp)
10368 {
10369   FeatureClausePtr fcp;
10370   Boolean          partial5, partial3;
10371 
10372   fcp = (FeatureClausePtr) MemNew (sizeof (FeatureClauseData));
10373   if (fcp == NULL) return NULL;
10374 
10375   fcp->feature_label_data.typeword = NULL;
10376   fcp->feature_label_data.description = NULL;
10377   fcp->feature_label_data.productname = NULL;
10378   fcp->feature_label_data.pluralizable = FALSE;
10379   fcp->feature_label_data.is_typeword_first = FALSE;
10380   fcp->allelename = NULL;
10381   fcp->interval = NULL;
10382   fcp->featlist = NULL;
10383   fcp->delete_me = FALSE;
10384   fcp->clause_info_only = FALSE;
10385   fcp->make_plural = FALSE;
10386   fcp->is_unknown = FALSE;
10387   fcp->grp = NULL;
10388   if (sfp == NULL) return fcp;
10389   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
10390   fcp->slp = SeqLocMerge (bsp, sfp->location, NULL,
10391                                  FALSE, TRUE, FALSE);
10392   SetSeqLocPartial (fcp->slp, partial5, partial3);
10393 
10394   if (sfp->data.choice == SEQFEAT_GENE)
10395   {
10396     fcp->grp = sfp->data.value.ptrvalue;
10397   }
10398   else
10399   {
10400     fcp->grp = SeqMgrGetGeneXref (sfp);
10401   }
10402   if (( IsCDS (sfp) || IsExon (sfp) || IsNoncodingProductFeat (sfp))
10403     && StringStr (sfp->comment, "alternatively spliced") != NULL)
10404   {
10405     fcp->is_alt_spliced = TRUE;
10406   }
10407   else
10408   {
10409     fcp->is_alt_spliced = FALSE;
10410   }
10411   if (IsCDS (sfp))
10412   {
10413     fcp->feature_label_data.productname = GetProductName (sfp, bsp, rp);
10414   }
10415   fcp->featlist = ValNodeNew (NULL);
10416   if (fcp->featlist == NULL)
10417   {
10418     MemFree (fcp);
10419     return NULL;
10420   }
10421 
10422   fcp->featlist->data.ptrvalue = sfp;
10423   fcp->featlist->choice = DEFLINE_FEATLIST;
10424 
10425   return fcp;
10426 }
10427 
GetFeatureList(BioseqPtr bsp,DeflineFeatureRequestListPtr rp)10428 static ValNodePtr GetFeatureList (BioseqPtr bsp, DeflineFeatureRequestListPtr rp)
10429 {
10430   ValNodePtr        head, vnp;
10431   SeqFeatPtr        sfp;
10432   FeatureClausePtr  fcp;
10433   SeqMgrFeatContext fcontext;
10434 
10435   if (bsp == NULL) return NULL;
10436 
10437   /* get list of all recognized features */
10438   head = NULL;
10439   sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
10440   while (sfp != NULL)
10441   {
10442     if (IsRecognizedFeature (sfp))
10443     {
10444       fcp = NewFeatureClause (sfp, bsp, rp);
10445       if (fcp == NULL) return NULL;
10446       fcp->numivals = fcontext.numivals;
10447       fcp->ivals = fcontext.ivals;
10448       vnp = ValNodeNew (head);
10449       if (head == NULL) head = vnp;
10450       if (vnp == NULL) return NULL;
10451       vnp->data.ptrvalue = fcp;
10452       vnp->choice = DEFLINE_CLAUSEPLUS;
10453     }
10454     sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
10455   }
10456   return head;
10457 }
10458 
10459 static void ExtractSegmentClauses (
10460   ValNodePtr segment_features,
10461   ValNodePtr parent_features,
10462   ValNodePtr PNTR segment_clauses
10463 );
10464 
FeatureIsOnSegment(SeqFeatPtr sfp,ValNodePtr segment_features)10465 static Boolean FeatureIsOnSegment (
10466   SeqFeatPtr sfp,
10467   ValNodePtr segment_features
10468 )
10469 {
10470   ValNodePtr vnp, featclause;
10471   FeatureClausePtr fcp;
10472 
10473   for (vnp = segment_features; vnp != NULL; vnp = vnp->next)
10474   {
10475     fcp = vnp->data.ptrvalue;
10476     if (fcp != NULL)
10477     {
10478       for (featclause = fcp->featlist;
10479            featclause != NULL;
10480            featclause = featclause->next)
10481       {
10482         if (featclause->data.ptrvalue == sfp) return TRUE;
10483       }
10484     }
10485   }
10486   return FALSE;
10487 }
10488 
10489 //LCOV_EXCL_START
10490 //Segsets no longer supported
FeatureClauseIsOnSegment(FeatureClausePtr fcp,ValNodePtr segment_features)10491 static Boolean FeatureClauseIsOnSegment (
10492   FeatureClausePtr fcp,
10493   ValNodePtr segment_features
10494 )
10495 {
10496   ValNodePtr vnp;
10497 
10498   if (fcp == NULL || fcp->featlist == NULL) return FALSE;
10499   for (vnp = fcp->featlist; vnp != NULL; vnp = vnp->next)
10500   {
10501     if (vnp->choice == DEFLINE_FEATLIST
10502       && FeatureIsOnSegment (vnp->data.ptrvalue, segment_features))
10503     {
10504       return TRUE;
10505     }
10506     else if (vnp->choice == DEFLINE_CLAUSEPLUS)
10507     {
10508       if (FeatureClauseIsOnSegment (vnp->data.ptrvalue, segment_features))
10509       {
10510         return TRUE;
10511       }
10512     }
10513   }
10514   return FALSE;
10515 }
10516 
10517 //Segsets no longer supported
CopyMatchingClauses(FeatureClausePtr fcp,ValNodePtr segment_features)10518 static FeatureClausePtr CopyMatchingClauses (
10519   FeatureClausePtr fcp,
10520   ValNodePtr segment_features
10521 )
10522 {
10523   FeatureClausePtr new_fcp, copy_clause;
10524   ValNodePtr       featlist, new_feat;
10525   Boolean          found_feat_on_segment;
10526   Boolean          partial5, partial3;
10527 
10528   new_fcp = (FeatureClausePtr) MemNew (sizeof (FeatureClauseData));
10529   if (new_fcp == NULL) return NULL;
10530   new_fcp->feature_label_data.pluralizable =
10531     fcp->feature_label_data.pluralizable;
10532   new_fcp->feature_label_data.is_typeword_first =
10533     fcp->feature_label_data.is_typeword_first;
10534   new_fcp->feature_label_data.typeword =
10535     StringSave (fcp->feature_label_data.typeword);
10536   new_fcp->feature_label_data.description =
10537     StringSave (fcp->feature_label_data.description);
10538   new_fcp->feature_label_data.productname =
10539     StringSave (fcp->feature_label_data.productname);
10540   new_fcp->allelename = StringSave (fcp->allelename);
10541   new_fcp->interval = StringSave (fcp->interval);
10542   new_fcp->is_alt_spliced = fcp->is_alt_spliced;
10543   CheckSeqLocForPartial (fcp->slp, &partial5, &partial3);
10544   new_fcp->slp = (SeqLocPtr) AsnIoMemCopy (fcp->slp, (AsnReadFunc) SeqLocAsnRead,
10545                                            (AsnWriteFunc) SeqLocAsnWrite);
10546   SetSeqLocPartial (new_fcp->slp, partial5, partial3);
10547   new_fcp->grp = fcp->grp;
10548   new_fcp->has_mrna = fcp->has_mrna;
10549   new_fcp->delete_me = FALSE;
10550   new_fcp->clause_info_only = fcp->clause_info_only;
10551   new_fcp->featlist = NULL;
10552   found_feat_on_segment = FALSE;
10553   for (featlist = fcp->featlist; featlist != NULL; featlist = featlist->next)
10554   {
10555     new_feat = NULL;
10556     if (featlist->choice == DEFLINE_FEATLIST)
10557     {
10558       new_feat = ValNodeNew (new_fcp->featlist);
10559       if (new_feat == NULL) return NULL;
10560       new_feat->data.ptrvalue = featlist->data.ptrvalue;
10561       new_feat->choice = DEFLINE_FEATLIST;
10562 
10563       /* some portions of the clause are present for product and gene info */
10564       /* if they aren't actually on the segment */
10565       if ( segment_features == NULL
10566         || FeatureIsOnSegment (new_feat->data.ptrvalue, segment_features))
10567       {
10568         found_feat_on_segment = TRUE;
10569       }
10570     }
10571     else if (featlist->choice == DEFLINE_CLAUSEPLUS)
10572     {
10573       copy_clause = featlist->data.ptrvalue;
10574       if ( segment_features == NULL
10575         || FeatureClauseIsOnSegment ( copy_clause, segment_features))
10576       {
10577         new_feat = ValNodeNew (new_fcp->featlist);
10578         if (new_feat == NULL) return NULL;
10579         new_feat->data.ptrvalue = CopyMatchingClauses ( copy_clause,
10580                                                         segment_features);
10581         new_feat->choice = DEFLINE_CLAUSEPLUS;
10582       }
10583     }
10584     if (new_feat != NULL && new_fcp->featlist == NULL)
10585     {
10586       new_fcp->featlist = new_feat;
10587     }
10588   }
10589   if (found_feat_on_segment)
10590   {
10591     new_fcp->clause_info_only = FALSE;
10592   }
10593   else
10594   {
10595     new_fcp->clause_info_only = TRUE;
10596   }
10597   return new_fcp;
10598 }
10599 
10600 //Segsets no longer supported
CopyFeatureList(ValNodePtr match_features,ValNodePtr parent_features,ValNodePtr PNTR new_list)10601 static void CopyFeatureList (
10602   ValNodePtr match_features,
10603   ValNodePtr parent_features,
10604   ValNodePtr PNTR new_list
10605 )
10606 {
10607   ValNodePtr vnp, addvnp;
10608 
10609   for (vnp = parent_features;
10610        vnp != NULL;
10611        vnp = vnp->next)
10612   {
10613     if (vnp->choice == DEFLINE_FEATLIST
10614       && (match_features == NULL
10615         || FeatureIsOnSegment ( vnp->data.ptrvalue, match_features)))
10616     {
10617       addvnp = ValNodeNew (*new_list);
10618       if (addvnp == NULL) return;
10619       addvnp->data.ptrvalue = vnp->data.ptrvalue;
10620       addvnp->choice = DEFLINE_FEATLIST;
10621       if (*new_list == NULL) *new_list = addvnp;
10622     }
10623     else if (vnp->choice == DEFLINE_CLAUSEPLUS
10624       && (match_features == NULL
10625         || FeatureClauseIsOnSegment ( vnp->data.ptrvalue, match_features)))
10626     {
10627       addvnp = ValNodeNew (*new_list);
10628       if (addvnp == NULL) return;
10629       addvnp->data.ptrvalue = CopyMatchingClauses ( vnp->data.ptrvalue,
10630                                                     match_features);
10631       addvnp->choice = DEFLINE_CLAUSEPLUS;
10632       if (*new_list == NULL) *new_list = addvnp;
10633     }
10634   }
10635 
10636 }
10637 
10638 //Segsets no longer supported
ExtractSegmentClauses(ValNodePtr segment_features,ValNodePtr parent_features,ValNodePtr PNTR segment_clauses)10639 static void ExtractSegmentClauses (
10640   ValNodePtr segment_features,
10641   ValNodePtr parent_features,
10642   ValNodePtr PNTR segment_clauses
10643 )
10644 {
10645   CopyFeatureList (segment_features, parent_features, segment_clauses);
10646 }
10647 //LCOV_EXCL_STOP
10648 
10649 typedef struct segmentdeflinedata {
10650   BioseqPtr  parent_bsp;
10651   ValNodePtr parent_feature_list;
10652   Uint1      molecule_type;
10653   DeflineFeatureRequestList PNTR feature_requests;
10654   ModifierCombinationPtr m;
10655   ModifierItemLocalPtr modList;
10656   OrganismDescriptionModifiersPtr odmp;
10657   Int2 product_flag;
10658 } SegmentDefLineData, PNTR SegmentDefLinePtr;
10659 
10660 typedef struct segmentdeflinefeatureclausedata {
10661   BioseqPtr  parent_bsp;
10662   ValNodePtr parent_feature_list;
10663   Uint1      molecule_type;
10664   DeflineFeatureRequestList PNTR feature_requests;
10665   Int2            product_flag;
10666   Boolean         alternate_splice_flag;
10667   Boolean         gene_cluster_opp_strand;
10668   ValNodePtr PNTR list;
10669 } SegmentDefLineFeatureClauseData, PNTR SegmentDefLineFeatureClausePtr;
10670 
10671 typedef struct deflinefeatclause {
10672   SeqEntryPtr sep;
10673   BioseqPtr   bsp;
10674   CharPtr     clauselist;
10675 } DefLineFeatClauseData, PNTR DefLineFeatClausePtr;
10676 
DefLineFeatClauseListFree(ValNodePtr vnp)10677 NLM_EXTERN void DefLineFeatClauseListFree (ValNodePtr vnp)
10678 {
10679   DefLineFeatClausePtr deflist;
10680 
10681   if (vnp == NULL) return;
10682   DefLineFeatClauseListFree (vnp->next);
10683   vnp->next = NULL;
10684   deflist = vnp->data.ptrvalue;
10685   if (deflist != NULL)
10686   {
10687     MemFree (deflist->clauselist);
10688     MemFree (deflist);
10689   }
10690   ValNodeFree (vnp);
10691 }
10692 
10693 
10694 //LCOV_EXCL_START
10695 //Segsets no longer supported
IntervalIntersectsIvals(Int2 numivals,Int4Ptr ivals,SeqMgrSegmentContextPtr context)10696 static Boolean IntervalIntersectsIvals
10697 (Int2    numivals,
10698  Int4Ptr ivals,
10699  SeqMgrSegmentContextPtr context)
10700 {
10701   Int2 idx;
10702   Int4 start, stop;
10703 
10704   if (numivals == 0 || ivals == NULL || context == NULL) return FALSE;
10705 
10706   for (idx = 0; idx < numivals; idx ++) {
10707     start = ivals [idx * 2];
10708     stop = ivals [idx * 2 + 1];
10709     if ( start <= context->cumOffset + context->to - context->from
10710          && stop >= context->cumOffset)
10711     {
10712       return TRUE;
10713     }
10714   }
10715   return FALSE;
10716 }
10717 
10718 
10719 //Segsets no longer supported
10720 /* if there are no features at all on this segment, select the genes that
10721  * traverse the segment.
10722  */
GrabTraversingGenes(ValNodePtr parent_feature_list,SeqMgrSegmentContextPtr context,BioseqPtr parent_bsp,DeflineFeatureRequestListPtr rp)10723 static ValNodePtr GrabTraversingGenes
10724 (ValNodePtr              parent_feature_list,
10725  SeqMgrSegmentContextPtr context,
10726  BioseqPtr               parent_bsp,
10727  DeflineFeatureRequestListPtr rp)
10728 {
10729   FeatureClausePtr  fcp, new_fcp;
10730   ValNodePtr        clause;
10731   SeqFeatPtr        sfp;
10732   ValNodePtr        segment_feature_list;
10733   ValNodePtr        vnp;
10734 
10735   segment_feature_list = NULL;
10736   for (clause = parent_feature_list;
10737        clause != NULL;
10738        clause = clause->next)
10739   {
10740     fcp = clause->data.ptrvalue;
10741 
10742     if (fcp != NULL  &&  fcp->featlist != NULL
10743         &&  (sfp = fcp->featlist->data.ptrvalue) != NULL
10744         &&  sfp->idx.subtype == FEATDEF_GENE
10745         &&  fcp->ivals != NULL && fcp->numivals > 0)
10746     {
10747       if (IntervalIntersectsIvals (fcp->numivals, fcp->ivals, context)) {
10748         new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue, parent_bsp,
10749                                     rp);
10750         if (new_fcp == NULL) return FALSE;
10751         vnp = ValNodeNew (segment_feature_list);
10752         if (vnp == NULL) return FALSE;
10753         vnp->data.ptrvalue = new_fcp;
10754         vnp->choice = DEFLINE_CLAUSEPLUS;
10755         if (segment_feature_list == NULL) segment_feature_list = vnp;
10756       }
10757     }
10758   }
10759   return segment_feature_list;
10760 }
10761 //LCOV_EXCL_STOP
10762 
10763 
10764 static CharPtr BuildFeatureClauses (
10765   BioseqPtr   bsp,
10766   Uint1       molecule_type,
10767   SeqEntryPtr sep,
10768   ValNodePtr  PNTR feature_list,
10769   Boolean     isSegment,
10770   ValNodePtr  PNTR seg_feature_list,
10771   Int2        product_flag,
10772   Boolean     alternate_splice_flag,
10773   Boolean     gene_cluster_opp_strand,
10774   DeflineFeatureRequestList PNTR feature_requests
10775 );
10776 
10777 //LCOV_EXCL_START
10778 //Segsets no longer supported
GetFeatureClauseForSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr context)10779 static Boolean LIBCALLBACK GetFeatureClauseForSeg (
10780   SeqLocPtr slp,
10781   SeqMgrSegmentContextPtr context)
10782 {
10783   SegmentDefLineFeatureClausePtr sdlp;
10784   ValNodePtr        clause, tmp_parent_list;
10785   FeatureClausePtr  fcp, new_fcp;
10786   Int2              idx;
10787   Int4              start, stop;
10788   ValNodePtr        segment_feature_list, vnp;
10789   SeqIdPtr          sip;
10790   BioseqPtr         bsp;
10791   Uint2             entityID;
10792   SeqLocPtr         loc;
10793   DefLineFeatClausePtr deflist;
10794 
10795   if (slp == NULL || context == NULL) return FALSE;
10796   sdlp = (SegmentDefLineFeatureClausePtr) context->userdata;
10797 
10798   sip = SeqLocId (slp);
10799 
10800   if (sip == NULL) {
10801     loc = SeqLocFindNext (slp, NULL);
10802     if (loc != NULL) {
10803       sip = SeqLocId (loc);
10804     }
10805   }
10806   if (sip == NULL) return TRUE;
10807 
10808   bsp = BioseqFind (sip);
10809 
10810   if (bsp == NULL) return TRUE;
10811 
10812 
10813   segment_feature_list = NULL;
10814   for (clause = sdlp->parent_feature_list;
10815        clause != NULL;
10816        clause = clause->next)
10817   {
10818     fcp = clause->data.ptrvalue;
10819 
10820     if (fcp != NULL && fcp->ivals != NULL && fcp->numivals > 0)
10821     {
10822       idx = (fcp->numivals - 1) * 2;
10823       start = fcp->ivals [idx];
10824       stop = fcp->ivals [idx + 1];
10825       if ( stop <= context->cumOffset + context->to - context->from
10826            && stop >= context->cumOffset)
10827       {
10828         new_fcp = NewFeatureClause (fcp->featlist->data.ptrvalue,
10829                                     sdlp->parent_bsp,
10830                                     sdlp->feature_requests);
10831         if (new_fcp == NULL) return FALSE;
10832         vnp = ValNodeNew (segment_feature_list);
10833         if (vnp == NULL) return FALSE;
10834         vnp->data.ptrvalue = new_fcp;
10835         vnp->choice = DEFLINE_CLAUSEPLUS;
10836         if (segment_feature_list == NULL) segment_feature_list = vnp;
10837       }
10838     }
10839   }
10840 
10841   if (segment_feature_list == NULL) {
10842     segment_feature_list = GrabTraversingGenes (sdlp->parent_feature_list,
10843                                                 context, sdlp->parent_bsp,
10844                                                 sdlp->feature_requests);
10845   }
10846 
10847   entityID = ObjMgrGetEntityIDForPointer (bsp);
10848 
10849   tmp_parent_list = NULL;
10850   CopyFeatureList (NULL, sdlp->parent_feature_list, &tmp_parent_list);
10851 
10852   deflist = (DefLineFeatClausePtr) MemNew (sizeof (DefLineFeatClauseData));
10853   if (deflist == NULL) return TRUE;
10854   deflist->sep = SeqMgrGetSeqEntryForData (bsp);
10855   deflist->bsp = bsp;
10856   deflist->clauselist = BuildFeatureClauses (sdlp->parent_bsp,
10857                             sdlp->molecule_type,
10858                             SeqMgrGetSeqEntryForData (bsp),
10859                             &tmp_parent_list,
10860                             TRUE,
10861                             &segment_feature_list,
10862                             sdlp->product_flag,
10863                             sdlp->alternate_splice_flag,
10864                             sdlp->gene_cluster_opp_strand,
10865                             sdlp->feature_requests);
10866   vnp = ValNodeNew (*(sdlp->list));
10867   if (vnp == NULL) return TRUE;
10868   if (*(sdlp->list) == NULL) *(sdlp->list) = vnp;
10869   vnp->data.ptrvalue = deflist;
10870 
10871   FreeListElement (tmp_parent_list);
10872   FreeListElement (segment_feature_list);
10873   DeleteMarkedObjects (entityID, 0, NULL);
10874   return TRUE;
10875 }
10876 //LCOV_EXCL_STOP
10877 
HasAnyPromoters(BioseqPtr bsp)10878 static Boolean HasAnyPromoters (BioseqPtr bsp)
10879 {
10880   SeqFeatPtr sfp;
10881   SeqMgrFeatContext fcontext;
10882   Boolean rval = FALSE;
10883 
10884   for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_regulatory, &fcontext);
10885        sfp != NULL && !rval;
10886        sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_regulatory, &fcontext)) {
10887     rval = IsPromoter(sfp);
10888   }
10889   return rval;
10890 }
10891 
AddFakePromoterClause(ValNodePtr PNTR feature_list,BioseqPtr bsp,DeflineFeatureRequestListPtr rp)10892 static void AddFakePromoterClause (ValNodePtr PNTR feature_list, BioseqPtr bsp, DeflineFeatureRequestListPtr rp)
10893 {
10894   FeatureClausePtr fcp;
10895   SeqFeatPtr       sfp = NULL;
10896 
10897   /* create fake promoter */
10898   sfp = CreateNewFeature (SeqMgrGetSeqEntryForData (bsp), NULL,
10899                           SEQFEAT_IMP, NULL);
10900 
10901   sfp->location = SeqLocIntNew (0, bsp->length - 1, Seq_strand_plus, SeqIdDup (bsp->id));
10902   sfp->data.choice = SEQFEAT_IMP;
10903   sfp->idx.subtype = FEATDEF_promoter;
10904   /* mark promoter for deletion */
10905   sfp->idx.deleteme = TRUE;
10906 
10907   fcp = NewFeatureClause (sfp, bsp, rp);
10908   if (fcp == NULL) return;
10909   fcp->numivals = 1;
10910   fcp->ivals = (Int4Ptr) MemNew (sizeof (Int4) * 2);
10911   fcp->ivals[0] = 0;
10912   fcp->ivals[1] = bsp->length - 1;
10913   ValNodeAddPointer (feature_list, DEFLINE_CLAUSEPLUS, fcp);
10914 
10915 }
10916 
IsInGenProdSet(BioseqPtr bsp)10917 static Boolean IsInGenProdSet (BioseqPtr bsp)
10918 {
10919   BioseqSetPtr bssp;
10920   if (bsp == NULL || bsp->idx.parentptr == NULL || bsp->idx.parenttype != OBJ_BIOSEQSET) {
10921     return FALSE;
10922   }
10923   bssp = (BioseqSetPtr) bsp->idx.parentptr;
10924   if (bssp->_class != BioseqseqSet_class_nuc_prot || bssp->idx.parentptr == NULL || bsp->idx.parenttype != OBJ_BIOSEQSET) {
10925     return FALSE;
10926   }
10927   bssp = bssp->idx.parentptr;
10928   if (bssp->_class == BioseqseqSet_class_gen_prod_set) {
10929     return TRUE;
10930   } else {
10931     return FALSE;
10932   }
10933 }
10934 
10935 
BuildNonFeatureListClause(BioseqPtr bsp,DefLineType feature_list_type)10936 NLM_EXTERN CharPtr BuildNonFeatureListClause (BioseqPtr bsp, DefLineType feature_list_type)
10937 {
10938   CharPtr      str = NULL;
10939   BioSourcePtr biop = NULL;
10940   SeqDescrPtr  sdp;
10941   SeqMgrDescContext context;
10942   CharPtr      organelle_name = NULL;
10943   Char         ending_str [200];
10944   CharPtr      mol_name = NULL;
10945   MolInfoPtr   molinfo;
10946 
10947   ending_str [0] = 0;
10948 
10949   biop = GetBiopForBsp (bsp);
10950   if (biop != NULL)
10951   {
10952       switch (biop->genome) {
10953       case GENOME_macronuclear :
10954           if (feature_list_type != DEFLINE_SEQUENCE) {
10955             organelle_name = "macronuclear";
10956           }
10957           break;
10958       case GENOME_nucleomorph :
10959           if (feature_list_type != DEFLINE_SEQUENCE) {
10960             organelle_name = "nucleomorph";
10961           }
10962           break;
10963       case GENOME_mitochondrion :
10964           organelle_name = "mitochondrion";
10965           break;
10966       case GENOME_apicoplast :
10967       case GENOME_chloroplast :
10968       case GENOME_kinetoplast :
10969       case GENOME_plastid :
10970       case GENOME_leucoplast :
10971           organelle_name = organelleByGenome [biop->genome];
10972           break;
10973       case GENOME_cyanelle :
10974       case GENOME_proplastid :
10975       case GENOME_hydrogenosome :
10976       case GENOME_chromatophore :
10977       case GENOME_chromoplast :
10978           if (feature_list_type != DEFLINE_SEQUENCE) {
10979             organelle_name = organelleByGenome [biop->genome];
10980           }
10981           break;
10982       }
10983   }
10984 
10985   if (organelle_name != NULL) {
10986     sprintf (ending_str, "%s", organelle_name);
10987   }
10988 
10989   if (feature_list_type == DEFLINE_COMPLETE_SEQUENCE)
10990   {
10991     StringCat (ending_str, ", complete sequence.");
10992   }
10993   else if (feature_list_type == DEFLINE_PARTIAL_SEQUENCE)
10994   {
10995     StringCat (ending_str, ", partial sequence.");
10996   }
10997   else if (feature_list_type == DEFLINE_COMPLETE_GENOME)
10998   {
10999     StringCat (ending_str, ", complete genome.");
11000   }
11001   else if (feature_list_type == DEFLINE_PARTIAL_GENOME)
11002   {
11003     StringCat (ending_str, ", partial genome.");
11004   }
11005   else if (feature_list_type == DEFLINE_SEQUENCE)
11006   {
11007     if (organelle_name == NULL) {
11008         sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
11009         if (sdp == NULL || (molinfo = sdp->data.ptrvalue) == NULL)
11010         {
11011           mol_name = NULL;
11012         }
11013         else
11014         {
11015           mol_name = BiomolNameFromBiomol (molinfo->biomol);
11016         }
11017     }
11018     if (mol_name == NULL)
11019     {
11020       sprintf (ending_str + StringLen (ending_str), " sequence");
11021     }
11022     else
11023     {
11024       sprintf (ending_str + StringLen (ending_str), " %s sequence", mol_name);
11025     }
11026   }
11027 
11028   str = StringSave (ending_str);
11029 
11030   return str;
11031 }
11032 
11033 
IsAmplifiedMiscFeat(ValNodePtr feature_list)11034 static Boolean IsAmplifiedMiscFeat (ValNodePtr feature_list)
11035 {
11036   FeatureClausePtr  fcp;
11037   Boolean           rval = FALSE;
11038   SeqFeatPtr        sfp;
11039   CharPtr           phrase1 = "amplified with primers designed for ";
11040   CharPtr           phrase2 = "amplified using primers designed for ";
11041 
11042   if (feature_list == NULL || feature_list->next != NULL) {
11043     rval = FALSE;
11044   } else if (feature_list->choice == DEFLINE_CLAUSEPLUS) {
11045     if ((fcp = (FeatureClausePtr) feature_list->data.ptrvalue) != NULL) {
11046       rval = IsAmplifiedMiscFeat (fcp->featlist);
11047     }
11048   } else if (feature_list->choice == DEFLINE_FEATLIST) {
11049     sfp = (SeqFeatPtr) feature_list->data.ptrvalue;
11050     if (sfp != NULL && sfp->idx.subtype == FEATDEF_misc_feature
11051         && (StringNICmp (sfp->comment, phrase1, StringLen (phrase1)) == 0
11052             || StringNICmp (sfp->comment, phrase2, StringLen (phrase2)) == 0)) {
11053       rval = TRUE;
11054     }
11055   }
11056   return rval;
11057 }
11058 
11059 
UseSpecialPhrase(BioseqPtr bsp,Uint1 molecule_type,ValNodePtr PNTR feature_list)11060 static CharPtr UseSpecialPhrase (BioseqPtr bsp, Uint1 molecule_type, ValNodePtr PNTR feature_list)
11061 {
11062   CharPtr phrase = NULL;
11063   if (bsp != NULL && bsp->mol == Seq_mol_dna && molecule_type == MOLECULE_TYPE_GENOMIC
11064       && feature_list != NULL && IsAmplifiedMiscFeat(*feature_list)) {
11065     phrase = StringSave ("genomic sequence");
11066   }
11067   return phrase;
11068 }
11069 
11070 
11071 /* NOTE: under some circumstances this function will create features that
11072  * are marked for deletion, so DeleteMarkedObjects should always be called
11073  * at some later point.
11074  */
BuildFeatureClauses(BioseqPtr bsp,Uint1 molecule_type,SeqEntryPtr sep,ValNodePtr PNTR feature_list,Boolean isSegment,ValNodePtr PNTR seg_feature_list,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand,DeflineFeatureRequestList PNTR feature_requests)11075 static CharPtr BuildFeatureClauses (
11076   BioseqPtr   bsp,
11077   Uint1       molecule_type,
11078   SeqEntryPtr sep,
11079   ValNodePtr  PNTR feature_list,
11080   Boolean     isSegment,
11081   ValNodePtr  PNTR seg_feature_list,
11082   Int2        product_flag,
11083   Boolean     alternate_splice_flag,
11084   Boolean     gene_cluster_opp_strand,
11085   DeflineFeatureRequestList PNTR feature_requests
11086 )
11087 {
11088   ValNodePtr   strings = NULL;
11089   ValNodePtr   clause;
11090   CharPtr      str = NULL;
11091   ValNodePtr   tmp_feat_list;
11092 
11093   if ((feature_requests->feature_list_type == DEFLINE_USE_FEATURES
11094        || (IsmRNASequence(bsp) && IsInGenProdSet(bsp)))
11095       && (! isSegment || (seg_feature_list != NULL && *seg_feature_list != NULL)))
11096   {
11097     /* special case for a feature that is not actually a feature */
11098     str = UseSpecialPhrase(bsp, molecule_type, feature_list);
11099     if (str != NULL) {
11100       return str;
11101     }
11102 
11103     /* remove features that indexer has chosen to suppress before they are grouped
11104      * with other features or used to determine loneliness etc.
11105      */
11106     RemoveSuppressedFeatures (feature_list, feature_requests->suppressed_feature_list);
11107 
11108     GroupmRNAs (feature_list, bsp, feature_requests);
11109 
11110     /* genes are added to other clauses */
11111     GroupGenes (feature_list, feature_requests->suppress_locus_tags);
11112 
11113     if (! feature_requests->suppress_alt_splice_phrase)
11114     {
11115       /* find alt-spliced CDSs */
11116       FindAltSplices (*feature_list, bsp, feature_requests);
11117     }
11118 
11119     GroupAltSplicedExons (feature_list, bsp, TRUE);
11120 
11121     if (!isSegment)
11122     {
11123        /* group CDSs that have the same name and are under the same gene together */
11124       GroupSegmentedCDSs (feature_list, bsp, TRUE, feature_requests);
11125     }
11126 
11127     /* per Susan's request, if promoters are requested and no promoters are found, add a promoter */
11128     if (feature_requests->keep_items[RemovablePromoter]
11129         && feature_requests->add_fake_promoters
11130         && !HasAnyPromoters (bsp)) {
11131       AddFakePromoterClause (feature_list, bsp, feature_requests);
11132     }
11133 
11134     /* now group clauses */
11135     GroupAllClauses ( feature_list, gene_cluster_opp_strand, bsp );
11136 
11137     ExpandAltSplicedExons (*feature_list, bsp, feature_requests);
11138 
11139     FindGeneProducts (*feature_list, bsp, feature_requests);
11140 
11141     if (seg_feature_list != NULL && *seg_feature_list != NULL)
11142     {
11143       tmp_feat_list = NULL;
11144       ExtractSegmentClauses ( *seg_feature_list, *feature_list, &tmp_feat_list);
11145       FreeListElement (*feature_list);
11146       DeleteMarkedObjects (bsp->idx.entityID, 0, NULL);
11147       *feature_list = tmp_feat_list;
11148     }
11149 
11150     /* remove exons and other unwanted features */
11151     RemoveUnwantedFeatures (feature_list, bsp, isSegment, feature_requests);
11152 
11153     RemoveGenesMentionedElsewhere (feature_list, *feature_list, TRUE,
11154                                    feature_requests->suppress_locus_tags);
11155 
11156     if (feature_requests->remove_subfeatures)
11157     {
11158       DeleteSubfeatures (feature_list, TRUE);
11159     }
11160 
11161     DeleteOperonAndGeneClusterSubfeatures (feature_list, TRUE);
11162 
11163     CountUnknownGenes (feature_list, bsp, feature_requests);
11164 
11165     if (feature_requests->misc_feat_parse_rule == 1)
11166     {
11167       RenameMiscFeats (*feature_list, molecule_type);
11168     }
11169     else
11170     {
11171       RemoveUnwantedMiscFeats (feature_list, TRUE);
11172     }
11173 
11174     /* take any exons on the minus strand */
11175     /* and reverse their order within the clause */
11176     ReverseClauses (feature_list, IsExonOrIntron);
11177 
11178     RenameExonSequences ( feature_list, bsp, TRUE);
11179 
11180     LabelClauses (*feature_list, molecule_type, bsp,
11181                   feature_requests);
11182 
11183     /* parse lists of tRNA and intergenic spacer clauses in misc_feat notes */
11184     /* need to do this after LabelClauses, since LabelClauses labels intergenic
11185      * spacers with more relaxed restrictions.  The labels from LabelClauses
11186      * for intergenic spacers are the default values.
11187      */
11188     ReplaceIntergenicSpacerClauses (feature_list, bsp, feature_requests);
11189 
11190     ConsolidateClauses (feature_list, bsp, molecule_type, TRUE,
11191                         feature_requests);
11192 
11193     /* this allows genes to be listed together even if they are from */
11194     /* separate sequences */
11195 /*    SmashTallClauses (feature_list, TRUE); */
11196 
11197     clause = *feature_list;
11198     ListClauses (clause, &strings, TRUE, FALSE, feature_requests->suppress_allele);
11199 
11200     AutoDef_AddEnding (clause, &strings, bsp,
11201                        product_flag, alternate_splice_flag);
11202     str = MergeValNodeStrings (strings, FALSE);
11203       ValNodeFreeData (strings);
11204   } else {
11205     str = BuildNonFeatureListClause(bsp, feature_requests->feature_list_type);
11206   }
11207 
11208   return str;
11209 }
11210 
11211 /* This function looks at the product names for the CDSs on the Bioseq,
11212  * and sets the flag for the "nuclear genes for X products" ending
11213  * based on the contents of the CDS products. */
GetProductFlagFromCDSProductNames(BioseqPtr bsp)11214 static Int2 GetProductFlagFromCDSProductNames (BioseqPtr bsp)
11215 {
11216   SeqMgrFeatContext context;
11217   SeqFeatPtr        cds = NULL;
11218   Int2              product_flag;
11219   Int2              i;
11220   CharPtr           found;
11221   Char              ch_before, ch_after;
11222 
11223   product_flag = 0;
11224   for (cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &context);
11225        cds != NULL && product_flag == 0;
11226        cds = cds->next)
11227   {
11228     for (i = 1; organelleByPopup[i] != NULL && product_flag == 0; i++)
11229     {
11230       found = StringStr (context.label, organelleByPopup[i]);
11231       if (found != NULL)
11232       {
11233         if (found == context.label) {
11234           ch_before = ' ';
11235         } else {
11236           ch_before = *(found - 1);
11237         }
11238         ch_after = *(found + StringLen (organelleByPopup[i]));
11239         if (ch_before == ' ' && (ch_after == 0 || ch_after == ' '))
11240         {
11241           product_flag = i;
11242         }
11243       }
11244     }
11245   }
11246 
11247   return product_flag;
11248 }
11249 
11250 
11251 //LCOV_EXCL_START
11252 //Segsets no longer supported
BuildFeatClauseListForSegSet(BioseqPtr bsp,Uint2 entityID,DeflineFeatureRequestList PNTR feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand,ValNodePtr PNTR list)11253 static void BuildFeatClauseListForSegSet (
11254   BioseqPtr bsp,
11255   Uint2 entityID,
11256   DeflineFeatureRequestList PNTR feature_requests,
11257   Int2 product_flag,
11258   Boolean alternate_splice_flag,
11259   Boolean gene_cluster_opp_strand,
11260   ValNodePtr PNTR list
11261 )
11262 {
11263   SegmentDefLineFeatureClauseData sdld;
11264   DefLineFeatClausePtr deflist;
11265   ValNodePtr    vnp;
11266 
11267   /* get default product flag if necessary */
11268   if (product_flag == -1 || product_flag == DEFAULT_ORGANELLE_CLAUSE) {
11269     product_flag = GetProductFlagFromCDSProductNames (bsp);
11270   }
11271 
11272   sdld.parent_bsp = bsp;
11273   sdld.molecule_type = GetMoleculeType (bsp, entityID);
11274   sdld.parent_feature_list = GetFeatureList (bsp,
11275                                               feature_requests);
11276 
11277   sdld.feature_requests =  feature_requests;
11278   sdld.product_flag = product_flag;
11279   sdld.alternate_splice_flag = alternate_splice_flag;
11280   sdld.gene_cluster_opp_strand = gene_cluster_opp_strand;
11281   sdld.list = list;
11282   SeqMgrExploreSegments (bsp, (Pointer) &sdld, GetFeatureClauseForSeg);
11283   deflist = (DefLineFeatClausePtr) MemNew (sizeof (DefLineFeatClauseData));
11284   if (deflist == NULL) return;
11285   deflist->sep = SeqMgrGetSeqEntryForData (bsp),
11286   deflist->bsp = bsp;
11287 
11288   deflist->clauselist = BuildFeatureClauses (bsp,
11289                         sdld.molecule_type,
11290                         SeqMgrGetSeqEntryForData (bsp),
11291                         &sdld.parent_feature_list,
11292                         FALSE,
11293                         NULL,
11294                         product_flag,
11295                         alternate_splice_flag,
11296                         gene_cluster_opp_strand,
11297                         feature_requests);
11298   vnp = ValNodeNew (*list);
11299   if (vnp == NULL) return;
11300   if (*list == NULL) *list = vnp;
11301   vnp->data.ptrvalue = deflist;
11302   FreeListElement (sdld.parent_feature_list);
11303 }
11304 //LCOV_EXCL_STOP
11305 
11306 
Is5SList(ValNodePtr feature_list)11307 static Boolean Is5SList (ValNodePtr feature_list)
11308 {
11309   FeatureClausePtr  fcp;
11310   Boolean           is_5s_list = TRUE;
11311   SeqFeatPtr        sfp;
11312   RnaRefPtr         rrp;
11313   Boolean           is_single = FALSE, found_single = FALSE;
11314 
11315   if (feature_list == NULL) {
11316     return FALSE;
11317   }
11318   if (feature_list->next == NULL) {
11319     is_single = TRUE;
11320   }
11321 
11322   while (feature_list != NULL && is_5s_list) {
11323     if (feature_list->choice != DEFLINE_CLAUSEPLUS) {
11324       is_5s_list = FALSE;
11325     } else if ((fcp = (FeatureClausePtr) feature_list->data.ptrvalue) == NULL) {
11326       is_5s_list = FALSE;
11327     } else if (fcp->featlist->choice != DEFLINE_FEATLIST
11328                || (sfp = (SeqFeatPtr) fcp->featlist->data.ptrvalue) == NULL) {
11329       is_5s_list = FALSE;
11330     } else if (sfp->idx.subtype == FEATDEF_rRNA) {
11331       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
11332       if (rrp == NULL) {
11333         is_5s_list = FALSE;
11334       } else if (rrp->ext.choice != 1) {
11335         is_5s_list = FALSE;
11336       } else if (StringCmp (rrp->ext.value.ptrvalue, "5S ribosomal RNA") != 0) {
11337         is_5s_list = FALSE;
11338       }
11339     } else if (sfp->idx.subtype == FEATDEF_misc_feature) {
11340       if (StringCmp (sfp->comment, "nontranscribed spacer") != 0
11341           && StringCmp (sfp->comment, "contains 5S ribosomal RNA and nontranscribed spacer") != 0) {
11342         is_5s_list = FALSE;
11343       } else if (StringCmp (sfp->comment, "contains 5S ribosomal RNA and nontranscribed spacer") == 0) {
11344         found_single = TRUE;
11345       }
11346     } else {
11347       is_5s_list = FALSE;
11348     }
11349     feature_list = feature_list->next;
11350   }
11351   if (is_single && !found_single) {
11352     is_5s_list = FALSE;
11353   }
11354   return is_5s_list;
11355 }
11356 
11357 
BuildOneFeatClauseList(SeqEntryPtr sep,Uint2 entityID,DeflineFeatureRequestList PNTR feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand,ValNodePtr PNTR list)11358 static void BuildOneFeatClauseList (
11359   SeqEntryPtr sep,
11360   Uint2 entityID,
11361   DeflineFeatureRequestList PNTR feature_requests,
11362   Int2 product_flag,
11363   Boolean alternate_splice_flag,
11364   Boolean gene_cluster_opp_strand,
11365   ValNodePtr PNTR list
11366 )
11367 {
11368   BioseqPtr    bsp;
11369   ValNodePtr    head;
11370   Uint1      molecule_type;
11371   SeqEntryPtr   nsep;
11372   DefLineFeatClausePtr deflist;
11373   ValNodePtr    vnp;
11374 
11375   nsep = FindNucSeqEntry (sep);
11376   if (nsep != NULL)
11377   {
11378     bsp = (BioseqPtr) nsep->data.ptrvalue;
11379     if ( SpecialHandlingForSpecialTechniques (bsp))
11380     {
11381       return;
11382     }
11383     if (bsp != NULL && bsp->repr == Seq_repr_seg &&
11384       bsp->seq_ext != NULL && bsp->seq_ext_type == 1)
11385     {
11386       //LCOV_EXCL_START
11387       //Segsets no longer supported
11388       BuildFeatClauseListForSegSet (bsp, entityID, feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, list);
11389       //LCOV_EXCL_STOP
11390     }
11391   }
11392 
11393   if (nsep != NULL && nsep != sep)
11394     sep = nsep;
11395 
11396 
11397   if (! IS_Bioseq (sep)) return;
11398 
11399   /* get list of all recognized features */
11400   bsp = (BioseqPtr) sep->data.ptrvalue;
11401   if (bsp == NULL) return;
11402   if ( SpecialHandlingForSpecialTechniques (bsp))
11403   {
11404     return;
11405   }
11406   molecule_type = GetMoleculeType (bsp, entityID);
11407   head = GetFeatureList (bsp, feature_requests);
11408 
11409   /* get default product flag if necessary */
11410   if (product_flag == -1 || product_flag == DEFAULT_ORGANELLE_CLAUSE) {
11411     product_flag = GetProductFlagFromCDSProductNames (bsp);
11412   }
11413 
11414   deflist = (DefLineFeatClausePtr) MemNew (sizeof (DefLineFeatClauseData));
11415   if (deflist == NULL) return;
11416   deflist->sep = SeqMgrGetSeqEntryForData (bsp),
11417   deflist->bsp = bsp;
11418   if (Is5SList(head)) {
11419     deflist->clauselist = StringSave ("5S ribosomal RNA gene region.");
11420   } else {
11421     deflist->clauselist = BuildFeatureClauses (bsp,
11422                                                molecule_type,
11423                                                SeqMgrGetSeqEntryForData (bsp),
11424                                                &head,
11425                                                FALSE,
11426                                                NULL,
11427                                                product_flag,
11428                                                alternate_splice_flag,
11429                                                gene_cluster_opp_strand,
11430                                                feature_requests);
11431   }
11432 
11433   vnp = ValNodeNew (*list);
11434   if (vnp == NULL) return;
11435   if (*list == NULL) *list = vnp;
11436   vnp->data.ptrvalue = deflist;
11437   FreeListElement (head);
11438 }
11439 
11440 
RecurseForBuildingFeatClauseLists(SeqEntryPtr sep,Uint2 entityID,DeflineFeatureRequestList PNTR feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand,ValNodePtr PNTR list)11441 static void RecurseForBuildingFeatClauseLists(
11442   SeqEntryPtr sep,
11443   Uint2 entityID,
11444   DeflineFeatureRequestList PNTR feature_requests,
11445   Int2 product_flag,
11446   Boolean alternate_splice_flag,
11447   Boolean gene_cluster_opp_strand,
11448   ValNodePtr PNTR list
11449 )
11450 {
11451   BioseqSetPtr    bssp;
11452 
11453   //LCOV_EXCL_START
11454   //when regenerating, always calling at bioseq level
11455   if ( IS_Bioseq_set (sep))
11456   {
11457     bssp = (BioseqSetPtr) sep->data.ptrvalue;
11458     if (bssp == NULL) return;
11459     if ( bssp->_class == 7 || IsPopPhyEtcSet (bssp->_class)
11460         || bssp->_class == BioseqseqSet_class_gen_prod_set
11461         || bssp->_class == BioseqseqSet_class_not_set)
11462     {
11463       for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
11464       {
11465         RecurseForBuildingFeatClauseLists (sep, entityID, feature_requests,
11466                                     product_flag, alternate_splice_flag,
11467                                     gene_cluster_opp_strand, list);
11468       }
11469       return;
11470     }
11471   }
11472   //LCOV_EXCL_STOP
11473   BuildOneFeatClauseList (sep, entityID, feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, list);
11474 }
11475 
11476 
11477 
BuildDefLineFeatClauseList(SeqEntryPtr sep,Uint2 entityID,DeflineFeatureRequestList PNTR feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand,ValNodePtr PNTR list)11478 NLM_EXTERN void BuildDefLineFeatClauseList (
11479   SeqEntryPtr sep,
11480   Uint2 entityID,
11481   DeflineFeatureRequestList PNTR feature_requests,
11482   Int2 product_flag,
11483   Boolean alternate_splice_flag,
11484   Boolean gene_cluster_opp_strand,
11485   ValNodePtr PNTR list
11486 )
11487 {
11488   RecurseForBuildingFeatClauseLists (sep, entityID, feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, list);
11489   DeleteMarkedObjects (entityID, 0, NULL);
11490 }
11491 
11492 //LCOV_EXCL_START
11493 //Not part of Autodef or Cleanup
IdenticalExceptForPartialComplete(CharPtr str1,CharPtr str2)11494 static Boolean IdenticalExceptForPartialComplete (CharPtr str1, CharPtr str2)
11495 {
11496     CharPtr cp, word_in_first, word_in_second;
11497     Int4    first_len, second_len, compare_len;
11498 
11499     if (StringHasNoText (str1) && StringHasNoText (str2)) {
11500         return TRUE;
11501     } else if (StringHasNoText (str1) || StringHasNoText (str2)) {
11502         return FALSE;
11503     }
11504 
11505     word_in_first = StringISearch (str1, "partial");
11506     cp = StringISearch (str1, "complete");
11507     if (word_in_first == NULL || (cp != NULL && word_in_first > cp)) {
11508         word_in_first = cp;
11509         first_len = 8;
11510     } else {
11511         first_len = 7;
11512     }
11513 
11514     word_in_second = StringISearch (str2, "partial");
11515     cp = StringISearch (str2, "complete");
11516     if (word_in_second == NULL || (cp != NULL && word_in_second > cp)) {
11517         word_in_second = cp;
11518         second_len = 8;
11519     } else {
11520         second_len = 7;
11521     }
11522 
11523     if (word_in_first == NULL && word_in_second == NULL) {
11524         if (StringCmp (str1, str2) == 0) {
11525             return TRUE;
11526         } else {
11527             return FALSE;
11528         }
11529     } else if (word_in_first == NULL || word_in_second == NULL) {
11530         return FALSE;
11531     } else if ((compare_len = word_in_first - str1) != word_in_second - str2) {
11532         return FALSE;
11533     } else if (StringNCmp (str1, str2, compare_len) != 0) {
11534         return FALSE;
11535     } else {
11536         return IdenticalExceptForPartialComplete (word_in_first + first_len, word_in_second + second_len);
11537     }
11538 }
11539 
11540 
11541 //Not part of Autodef or Cleanup
GetTaxnameForBsp(BioseqPtr bsp)11542 static CharPtr GetTaxnameForBsp (BioseqPtr bsp)
11543 {
11544   SeqDescrPtr       sdp;
11545   SeqMgrDescContext context;
11546   BioSourcePtr      biop;
11547   CharPtr           taxname = NULL;
11548 
11549   if (bsp != NULL) {
11550     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
11551     if (sdp != NULL && (biop = sdp->data.ptrvalue) != NULL
11552       && biop->org != NULL) {
11553       taxname = biop->org->taxname;
11554       }
11555   }
11556   return taxname;
11557 }
11558 
11559 //Not part of Autodef or Cleanup (no more popset retros)
AreFeatureClausesUnique(ValNodePtr list)11560 NLM_EXTERN Boolean AreFeatureClausesUnique (ValNodePtr list)
11561 {
11562   ValNodePtr vnp1, vnp2;
11563   DefLineFeatClausePtr deflist1, deflist2;
11564   CharPtr              taxname1;
11565 
11566   for (vnp1 = list; vnp1 != NULL && vnp1->next != NULL; vnp1 = vnp1->next)
11567   {
11568     deflist1 = vnp1->data.ptrvalue;
11569     if (deflist1 == NULL || deflist1->clauselist == NULL) return FALSE;
11570     taxname1 = GetTaxnameForBsp (deflist1->bsp);
11571 
11572     for (vnp2 = vnp1->next; vnp2 != NULL; vnp2 = vnp2->next)
11573     {
11574       deflist2 = vnp2->data.ptrvalue;
11575       if (deflist2 == NULL || deflist2->clauselist == NULL
11576         || (StringCmp (taxname1, GetTaxnameForBsp (deflist2->bsp)) == 0
11577             && IdenticalExceptForPartialComplete (deflist1->clauselist, deflist2->clauselist)))
11578       {
11579         return FALSE;
11580       }
11581     }
11582   }
11583   return TRUE;
11584 }
11585 //LCOV_EXCL_STOP
11586 
11587 
GetKeywordPrefix(SeqEntryPtr sep)11588 NLM_EXTERN CharPtr GetKeywordPrefix (SeqEntryPtr sep)
11589 {
11590   ValNodePtr vnp;
11591   GBBlockPtr gbp;
11592   MolInfoPtr mip;
11593 
11594   vnp = SeqEntryGetSeqDescr (sep, Seq_descr_genbank, NULL);
11595   if (vnp == NULL) {
11596     vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
11597     if (vnp != NULL) {
11598       mip = (MolInfoPtr) vnp->data.ptrvalue;
11599       if (mip != NULL && mip->tech == MI_TECH_tsa) {
11600         return "TSA: ";
11601       }
11602     }
11603     } else {
11604       gbp = (GBBlockPtr) vnp->data.ptrvalue;
11605       if (gbp != NULL)
11606       {
11607         for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next)
11608         {
11609           if (StringICmp((CharPtr)vnp->data.ptrvalue, "TPA:inferential") == 0)
11610           {
11611             return "TPA_inf: ";
11612           }
11613           else if (StringICmp((CharPtr)vnp->data.ptrvalue, "TPA:experimental") == 0)
11614           {
11615             return "TPA_exp: ";
11616           }
11617         }
11618       }
11619     }
11620     return "";
11621 }
11622 
11623 
11624 NLM_EXTERN CharPtr
BuildOneDefinitionLine(SeqEntryPtr sep,BioseqPtr bsp,CharPtr featclause_list,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,OrganismDescriptionModifiersPtr odmp)11625 BuildOneDefinitionLine
11626 (SeqEntryPtr sep,
11627  BioseqPtr   bsp,
11628  CharPtr  featclause_list,
11629   ModifierItemLocalPtr modList,
11630   ValNodePtr modifier_indices,
11631   OrganismDescriptionModifiersPtr odmp)
11632 {
11633   CharPtr    org_desc, tmp_str, keyword_prefix;
11634 
11635   keyword_prefix = GetKeywordPrefix (sep);
11636 
11637   org_desc = GetOrganismDescription (bsp, modList, modifier_indices, odmp);
11638   tmp_str = (CharPtr) MemNew (StringLen (keyword_prefix)
11639                               + StringLen (org_desc)
11640                               + StringLen (featclause_list) + 2);
11641   if (tmp_str == NULL) return NULL;
11642   tmp_str [0] = 0;
11643   if (keyword_prefix != NULL)
11644   {
11645     StringCat (tmp_str, keyword_prefix);
11646   }
11647   StringCat (tmp_str, org_desc);
11648   MemFree (org_desc);
11649   if (featclause_list != NULL
11650     && featclause_list [0] != ','
11651     && featclause_list [0] != '.'
11652     && featclause_list [0] != 0)
11653   {
11654     StringCat (tmp_str, " ");
11655   }
11656   StringCat (tmp_str, featclause_list);
11657   tmp_str [0] = toupper (tmp_str [0]);
11658 
11659   return tmp_str;
11660 }
11661 
11662 
11663 typedef struct deflineclauseoptions {
11664   DeflineFeatureRequestList feature_requests;
11665   Int2 product_flag;
11666   Boolean alternate_splice_flag;
11667   Boolean gene_cluster_opp_strand;
11668 } DefLineClauseOptions, PNTR DefLineClauseOptionsPtr;
11669 
11670 //LCOV_EXCL_START
11671 //Not part of Autodef or Cleanup (no more popset retros)
DefLineClauseOptionsNew(void)11672 static DefLineClauseOptionsPtr DefLineClauseOptionsNew (void)
11673 {
11674   DefLineClauseOptionsPtr clause_options;
11675 
11676   clause_options = MemNew (sizeof (DefLineClauseOptions));
11677   InitFeatureRequests (&(clause_options->feature_requests));
11678   clause_options->product_flag = DEFAULT_ORGANELLE_CLAUSE;
11679   clause_options->alternate_splice_flag = FALSE;
11680   clause_options->gene_cluster_opp_strand = FALSE;
11681 
11682   return clause_options;
11683 }
11684 
11685 
DefLineClauseOptionsFree(DefLineClauseOptionsPtr clause_options)11686 static DefLineClauseOptionsPtr DefLineClauseOptionsFree (DefLineClauseOptionsPtr clause_options)
11687 {
11688   if (clause_options != NULL) {
11689     clause_options = MemFree (clause_options);
11690   }
11691   return clause_options;
11692 }
11693 
11694 
11695 typedef void (*Nlm_SetFeatureRequestsProc) PROTO ((DefLineClauseOptionsPtr));
11696 
11697 //Not part of Autodef or Cleanup (no more popset retros)
DefaultClauseOptions(DefLineClauseOptionsPtr clause_options)11698 static void DefaultClauseOptions (DefLineClauseOptionsPtr clause_options)
11699 {
11700   InitFeatureRequests (&(clause_options->feature_requests));
11701   clause_options->product_flag = DEFAULT_ORGANELLE_CLAUSE;
11702   clause_options->alternate_splice_flag = FALSE;
11703   clause_options->gene_cluster_opp_strand = FALSE;
11704 }
11705 
11706 
11707 //Not part of Autodef or Cleanup (no more popset retros)
CompleteSequenceClauseOptions(DefLineClauseOptionsPtr clause_options)11708 static void CompleteSequenceClauseOptions (DefLineClauseOptionsPtr clause_options)
11709 {
11710   InitFeatureRequests (&(clause_options->feature_requests));
11711   clause_options->feature_requests.feature_list_type = DEFLINE_COMPLETE_SEQUENCE;
11712   clause_options->product_flag = DEFAULT_ORGANELLE_CLAUSE;
11713   clause_options->alternate_splice_flag = FALSE;
11714   clause_options->gene_cluster_opp_strand = FALSE;
11715 }
11716 
11717 //Not part of Autodef or Cleanup (no more popset retros)
CompleteGenomeClauseOptions(DefLineClauseOptionsPtr clause_options)11718 static void CompleteGenomeClauseOptions (DefLineClauseOptionsPtr clause_options)
11719 {
11720   InitFeatureRequests (&(clause_options->feature_requests));
11721   clause_options->feature_requests.feature_list_type = DEFLINE_COMPLETE_GENOME;
11722   clause_options->product_flag = DEFAULT_ORGANELLE_CLAUSE;
11723   clause_options->alternate_splice_flag = FALSE;
11724   clause_options->gene_cluster_opp_strand = FALSE;
11725 }
11726 
11727 //Not part of Autodef or Cleanup (no more popset retros)
SequenceClauseOptions(DefLineClauseOptionsPtr clause_options)11728 static void SequenceClauseOptions (DefLineClauseOptionsPtr clause_options)
11729 {
11730   InitFeatureRequests (&(clause_options->feature_requests));
11731   clause_options->feature_requests.feature_list_type = DEFLINE_SEQUENCE;
11732   clause_options->product_flag = DEFAULT_ORGANELLE_CLAUSE;
11733   clause_options->alternate_splice_flag = FALSE;
11734   clause_options->gene_cluster_opp_strand = FALSE;
11735 }
11736 
11737 
11738 //Not part of Autodef or Cleanup (no more popset retros)
MiscFeatNonCodingOptions(DefLineClauseOptionsPtr clause_options)11739 static void MiscFeatNonCodingOptions (DefLineClauseOptionsPtr clause_options)
11740 {
11741   InitFeatureRequests (&(clause_options->feature_requests));
11742 
11743   clause_options->feature_requests.keep_items[RemovableNoncodingProductFeat] = TRUE;
11744 }
11745 
11746 
11747 //Not part of Autodef or Cleanup (no more popset retros)
MiscFeatSemicolonOptions(DefLineClauseOptionsPtr clause_options)11748 static void MiscFeatSemicolonOptions (DefLineClauseOptionsPtr clause_options)
11749 {
11750   InitFeatureRequests (&(clause_options->feature_requests));
11751 
11752   clause_options->feature_requests.keep_items[RemovableNoncodingProductFeat] = TRUE;
11753   clause_options->feature_requests.misc_feat_parse_rule = 1;
11754 
11755 }
11756 
11757 //Not part of Autodef or Cleanup (no more popset retros)
MitochondrialProductClauseOptions(DefLineClauseOptionsPtr clause_options)11758 static void MitochondrialProductClauseOptions (DefLineClauseOptionsPtr clause_options)
11759 {
11760   InitFeatureRequests (&(clause_options->feature_requests));
11761   clause_options->product_flag = 2;
11762   clause_options->alternate_splice_flag = FALSE;
11763   clause_options->gene_cluster_opp_strand = FALSE;
11764 }
11765 
11766 
11767 //Not part of Autodef or Cleanup (no more popset retros)
RequestPromoterAndExon(DefLineClauseOptionsPtr clause_options)11768 static void RequestPromoterAndExon (DefLineClauseOptionsPtr clause_options)
11769 {
11770   InitFeatureRequests (&(clause_options->feature_requests));
11771   clause_options->feature_requests.keep_items[RemovableExon] = TRUE;
11772   clause_options->feature_requests.keep_items[RemovablePromoter] = TRUE;
11773 }
11774 
11775 
11776 //Not part of Autodef or Cleanup (no more popset retros)
RequestIntronAndExon(DefLineClauseOptionsPtr clause_options)11777 static void RequestIntronAndExon (DefLineClauseOptionsPtr clause_options)
11778 {
11779   InitFeatureRequests (&(clause_options->feature_requests));
11780   clause_options->feature_requests.keep_items[RemovableExon] = TRUE;
11781   clause_options->feature_requests.keep_items[RemovableIntron] = TRUE;
11782 }
11783 
11784 //Not part of Autodef or Cleanup (no more popset retros)
11785 static Nlm_SetFeatureRequestsProc ClauseOptionSetList[] = {
11786   DefaultClauseOptions,
11787   RequestPromoterAndExon,
11788   RequestIntronAndExon,
11789   CompleteSequenceClauseOptions,
11790   CompleteGenomeClauseOptions,
11791   SequenceClauseOptions,
11792   MiscFeatNonCodingOptions,
11793   MiscFeatSemicolonOptions,
11794   MitochondrialProductClauseOptions,
11795   NULL };
11796 
11797 typedef Boolean (*Nlm_SetOrgModifiersProc) PROTO ((OrganismDescriptionModifiersPtr, ValNodePtr PNTR, ModifierItemLocalPtr));
11798 
11799 
11800 //Not part of Autodef or Cleanup (no more popset retros)
DefaultOrgOptions(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list)11801 static void DefaultOrgOptions (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list)
11802 {
11803   odmp->use_modifiers = TRUE;
11804   odmp->use_labels = TRUE;
11805 }
11806 
11807 
11808 //Not part of Autodef or Cleanup (no more popset retros)
SubstituteMod(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available,Int4 mod1,Int4 mod2)11809 static Boolean SubstituteMod (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available, Int4 mod1, Int4 mod2)
11810 {
11811   ValNodePtr vnp;
11812   Boolean has_one = FALSE;
11813   Boolean has_two = FALSE;
11814   Boolean rval = FALSE;
11815 
11816   if (mod_list == NULL || *mod_list == NULL || available == NULL || !available[mod2].any_present) {
11817     return FALSE;
11818   }
11819   DefaultOrgOptions(odmp, mod_list);
11820 
11821   for (vnp = *mod_list; vnp != NULL; vnp = vnp->next) {
11822     if (vnp->data.intvalue == mod1) {
11823       has_one = TRUE;
11824     } else if (vnp->data.intvalue == mod2) {
11825       has_two = TRUE;
11826     }
11827   }
11828 
11829   if (has_one && !has_two) {
11830     for (vnp = *mod_list; vnp != NULL; vnp = vnp->next) {
11831       if (vnp->data.intvalue == mod1) {
11832         vnp->data.intvalue = mod2;
11833         rval = TRUE;
11834       }
11835     }
11836   }
11837   return rval;
11838 }
11839 
11840 
11841 //Not part of Autodef or Cleanup (no more popset retros)
IsolateInsteadOfClone(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11842 static Boolean IsolateInsteadOfClone (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11843 {
11844   return SubstituteMod (odmp, mod_list, available, DEFLINE_POS_Clone, DEFLINE_POS_Isolate);
11845 }
11846 
11847 
11848 //Not part of Autodef or Cleanup (no more popset retros)
HaplotypeInsteadOfVoucher(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11849 static Boolean  HaplotypeInsteadOfVoucher (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11850 {
11851   return SubstituteMod (odmp, mod_list, available, DEFLINE_POS_Specimen_voucher, DEFLINE_POS_Haplotype);
11852 }
11853 
11854 //Not part of Autodef or Cleanup (no more popset retros)
VoucherInsteadOfIsolate(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11855 static Boolean  VoucherInsteadOfIsolate (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11856 {
11857   return SubstituteMod (odmp, mod_list, available, DEFLINE_POS_Isolate, DEFLINE_POS_Specimen_voucher);
11858 }
11859 
11860 
11861 //Not part of Autodef or Cleanup (no more popset retros)
UseNone(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11862 static Boolean UseNone (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11863 {
11864   if (mod_list == NULL || *mod_list == NULL) {
11865     return FALSE;
11866   }
11867 
11868   DefaultOrgOptions(odmp, mod_list);
11869   odmp->use_modifiers = FALSE;
11870   return TRUE;
11871 }
11872 
11873 
11874 //Not part of Autodef or Cleanup (no more popset retros)
AddOneAdjustmentMod(ValNodePtr PNTR mod_list,ModifierItemLocalPtr available,Int4 specific)11875 static Boolean AddOneAdjustmentMod (ValNodePtr PNTR mod_list, ModifierItemLocalPtr available, Int4 specific)
11876 {
11877   Boolean already_has_sv = FALSE;
11878   Boolean rval = FALSE;
11879   ValNodePtr vnp, vnp_new;
11880 
11881   if (mod_list == NULL || available == NULL || !available[specific].any_present) {
11882     return FALSE;
11883   }
11884 
11885   for (vnp = *mod_list; vnp != NULL; vnp = vnp->next) {
11886     if (vnp->data.intvalue == specific) {
11887       already_has_sv = TRUE;
11888     }
11889   }
11890   if (!already_has_sv) {
11891     vnp_new = ValNodeNew (NULL);
11892     vnp_new->choice = 0;
11893     vnp_new->data.intvalue = specific;
11894     vnp_new->next = *mod_list;
11895     *mod_list = vnp_new;
11896     rval = TRUE;
11897   }
11898   return rval;
11899 }
11900 
11901 
11902 //Not part of Autodef or Cleanup (no more popset retros)
UseOneSpecific(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available,Int4 specific)11903 static Boolean UseOneSpecific (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available, Int4 specific)
11904 {
11905   DefaultOrgOptions(odmp, mod_list);
11906   return AddOneAdjustmentMod (mod_list, available, specific);
11907 }
11908 
11909 
11910 //Not part of Autodef or Cleanup (no more popset retros)
UseStrain(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11911 static Boolean UseStrain (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11912 {
11913   return UseOneSpecific (odmp, mod_list, available, DEFLINE_POS_Strain);
11914 }
11915 
11916 
11917 //Not part of Autodef or Cleanup (no more popset retros)
UseSpecimenVoucher(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11918 static Boolean UseSpecimenVoucher (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11919 {
11920   return UseOneSpecific (odmp, mod_list, available, DEFLINE_POS_Specimen_voucher);
11921 }
11922 
11923 
11924 //Not part of Autodef or Cleanup (no more popset retros)
UseHaplotype(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11925 static Boolean UseHaplotype (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11926 {
11927   return UseOneSpecific (odmp, mod_list, available, DEFLINE_POS_Haplotype);
11928 }
11929 
11930 
11931 //Not part of Autodef or Cleanup (no more popset retros)
UseAutoDefId(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11932 static Boolean UseAutoDefId (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11933 {
11934   size_t index;
11935   ValNodePtr vnp;
11936   Boolean found, changed = FALSE;
11937   ModifierItemLocalPtr cpy;
11938 
11939   if (mod_list == NULL) {
11940     return FALSE;
11941   }
11942 
11943   cpy = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData));
11944   if (cpy == NULL) return FALSE;
11945   MemCpy (cpy, available, NumDefLineModifiers () * sizeof (ModifierItemLocalData));
11946 
11947   DefaultOrgOptions(odmp, mod_list);
11948   SetAutoDefIDModifiers (cpy);
11949   for (index = 0; index < NumDefLineModifiers(); index++) {
11950     if (cpy[index].required) {
11951       found = FALSE;
11952       if (*mod_list != NULL) {
11953         for (vnp = *mod_list; vnp != NULL && !found; vnp = vnp->next) {
11954           if (vnp->data.intvalue == index) {
11955             found = TRUE;
11956           }
11957         }
11958       }
11959       if (!found) {
11960         ValNodeAddInt (mod_list, 0, index);
11961         changed = TRUE;
11962       }
11963     }
11964   }
11965   cpy = MemFree (cpy);
11966 
11967   return changed;
11968 }
11969 
11970 
11971 //Not part of Autodef or Cleanup (no more popset retros)
DontExcludeSp(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11972 static Boolean DontExcludeSp (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11973 {
11974   if (odmp == NULL || !odmp->exclude_sp) {
11975     return FALSE;
11976   }
11977   DefaultOrgOptions(odmp, mod_list);
11978   odmp->exclude_sp = FALSE;
11979   return TRUE;
11980 }
11981 
11982 
11983 //Not part of Autodef or Cleanup (no more popset retros)
UseCountryAndIsolate(OrganismDescriptionModifiersPtr odmp,ValNodePtr PNTR mod_list,ModifierItemLocalPtr available)11984 static Boolean UseCountryAndIsolate (OrganismDescriptionModifiersPtr odmp, ValNodePtr PNTR mod_list, ModifierItemLocalPtr available)
11985 {
11986   Boolean add_country, add_isolate;
11987   Boolean rval = FALSE;
11988 
11989   DefaultOrgOptions(odmp, mod_list);
11990   add_country =  AddOneAdjustmentMod (mod_list, available, DEFLINE_POS_Country);
11991   add_isolate = AddOneAdjustmentMod (mod_list, available, DEFLINE_POS_Isolate);
11992   if (add_country || add_isolate) {
11993     rval = TRUE;
11994   }
11995   return rval;
11996 }
11997 
11998 
11999 //Not part of Autodef or Cleanup (no more popset retros)
12000 static Nlm_SetOrgModifiersProc OrgModifiersSetList[] = {
12001   IsolateInsteadOfClone,
12002   UseNone,
12003   UseStrain,
12004   UseSpecimenVoucher,
12005   UseHaplotype,
12006   HaplotypeInsteadOfVoucher,
12007   VoucherInsteadOfIsolate,
12008   UseAutoDefId,
12009   DontExcludeSp,
12010   UseCountryAndIsolate,
12011   NULL };
12012 
12013 
12014 typedef Boolean (*Nlm_CompareDeflinesProc) PROTO ((CharPtr, CharPtr));
12015 
12016 //Not part of Autodef or Cleanup (no more popset retros)
MatchExceptOldProduct(CharPtr old_str,CharPtr new_str)12017 static Boolean MatchExceptOldProduct (CharPtr old_str, CharPtr new_str)
12018 {
12019   Int4 old_len, new_len, pattern_len, new_pattern_len, organelle_len, i;
12020   CharPtr pattern_fmt = "%s %s gene%s for %s product%s.";
12021   CharPtr other_pattern_fmt = "%s %s gene%s for %s RNA.";
12022   CharPtr new_pattern_fmt = "; %s.";
12023   CharPtr pattern, new_pattern;
12024   Boolean rval = FALSE;
12025   CharPtr delimiters[] = {";", ",", NULL};
12026   CharPtr plural[] = {"", "s", NULL};
12027   Int4 j, k;
12028 
12029   old_len = StringLen (old_str);
12030   new_len = StringLen (new_str);
12031 
12032   for (i = 2; organelleByGenome[i] != NULL && !rval; i++) {
12033     if (StringHasNoText (organelleByGenome[i])) {
12034       continue;
12035     }
12036     organelle_len = StringLen (organelleByGenome[i]);
12037     new_pattern = (CharPtr) MemNew (sizeof (Char) * (StringLen (new_pattern_fmt) + organelle_len));
12038     sprintf (new_pattern, new_pattern_fmt, organelleByGenome[i]);
12039     new_pattern_len = StringLen (new_pattern);
12040 
12041     if (new_len > new_pattern_len
12042         && StringCmp (new_str + new_len - new_pattern_len, new_pattern) == 0) {
12043       pattern = (CharPtr) MemNew (sizeof (Char) * (StringLen (pattern_fmt) + (2 * organelle_len)));
12044       for (j = 0; delimiters[j] != NULL; j++) {
12045         for (k = 0; plural[k] != NULL; k++) {
12046           /* first pattern */
12047           sprintf (pattern, pattern_fmt, delimiters[j], organelleByGenome[i], plural[k], organelleByGenome[i], plural[k]);
12048           pattern_len = StringLen (pattern);
12049           if (old_len > pattern_len && new_len > organelle_len + 2
12050               && StringCmp (old_str + old_len - pattern_len, pattern) == 0) {
12051             rval = TRUE;
12052           }
12053           /* other pattern */
12054           sprintf (pattern, other_pattern_fmt, delimiters[j], organelleByGenome[i], plural[k], organelleByGenome[i]);
12055           pattern_len = StringLen (pattern);
12056           if (old_len > pattern_len && new_len > organelle_len + 2
12057               && StringCmp (old_str + old_len - pattern_len, pattern) == 0) {
12058             rval = TRUE;
12059           }
12060         }
12061       }
12062 
12063       pattern = MemFree (pattern);
12064     }
12065     new_pattern = MemFree (new_pattern);
12066   }
12067   return rval;
12068 }
12069 
12070 
12071 //Not part of Autodef or Cleanup (no more popset retros)
NoSequenceForIntron(CharPtr old_str,CharPtr new_str)12072 static Boolean NoSequenceForIntron (CharPtr old_str, CharPtr new_str)
12073 {
12074   Int4 old_len, new_len;
12075 
12076   old_len = StringLen (old_str);
12077   new_len = StringLen (new_str);
12078 
12079   if (new_len < old_len && StringNCmp (old_str, new_str, new_len - 1) == 0
12080       && (StringCmp (old_str + new_len - 7, "intron, partial sequence.") == 0
12081           || StringCmp (old_str + new_len - 7, "intron, complete sequence.") == 0)) {
12082     return TRUE;
12083   } else {
12084     return FALSE;
12085   }
12086 
12087 }
12088 
12089 
12090 //Not part of Autodef or Cleanup (no more popset retros)
SkipPair(CharPtr PNTR a,CharPtr PNTR b,CharPtr a_start,CharPtr b_start,CharPtr val1,CharPtr val2)12091 static Boolean SkipPair (CharPtr PNTR a, CharPtr PNTR b, CharPtr a_start, CharPtr b_start, CharPtr val1, CharPtr val2)
12092 {
12093   Int4 len1, len2;
12094   Char end1, end2;
12095   Boolean rval = FALSE;
12096 
12097   if (a == NULL || *a == NULL
12098       || b == NULL || *b == NULL
12099       || a_start == NULL || b_start == NULL
12100       || val1 == NULL || val2 == NULL) {
12101     return FALSE;
12102   }
12103   len1 = StringLen (val1);
12104   len2 = StringLen (val2);
12105 
12106   end1 = *(val1 + len1 - 1);
12107   end2 = *(val2 + len2 - 1);
12108 
12109   if (**a == end1 && **b == end2 && *a - a_start > len1 && *b - b_start > len2
12110             && StringNCmp (*a - len1 + 1, val1, len1) == 0
12111             && StringNCmp (*b - len2 + 1, val2, len2) == 0) {
12112     *a -= len1 - 1;
12113     *b -= len2 - 1;
12114     rval = TRUE;
12115   } else if (**a == end2 && **b == end1 && *a - a_start > len2 && *b - b_start > len2
12116               && StringNCmp (*a - len2 + 1, val2, len2) == 0
12117               && StringNCmp (*b - len1 + 1, val1, len1) == 0) {
12118     *a -= len2 - 1;
12119     *b -= len1 - 1;
12120     rval = TRUE;
12121   }
12122   return rval;
12123 }
12124 
12125 
12126 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForSpace(CharPtr PNTR a,CharPtr PNTR b,CharPtr a_start,CharPtr b_start)12127 static Boolean AdjustForSpace (CharPtr PNTR a, CharPtr PNTR b, CharPtr a_start, CharPtr b_start)
12128 {
12129   Boolean rval = FALSE;
12130 
12131   if (a == NULL || *a == NULL
12132       || b == NULL || *b == NULL) {
12133     rval = FALSE;
12134   } else if (**a == ' ' && *a > a_start && *((*a) - 1) == **b) {
12135     (*a)--;
12136     rval = TRUE;
12137   } else if (**b == ' ' && *b > b_start && *((*b) - 1) == **a) {
12138     (*b)--;
12139     rval = TRUE;
12140   }
12141   return rval;
12142 }
12143 
12144 
12145 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForCharBeforePhrase(CharPtr PNTR a,CharPtr PNTR b,CharPtr phrase,Char ch)12146 static Boolean AdjustForCharBeforePhrase (CharPtr PNTR a, CharPtr PNTR b, CharPtr phrase, Char ch)
12147 {
12148   Boolean rval = FALSE;
12149   Int4    phrase_len = StringLen (phrase);
12150 
12151   if (a == NULL || *a == NULL
12152       || b == NULL || *b == NULL) {
12153     rval = FALSE;
12154   } else if (**a == ch && StringNCmp ((*a) + 1, phrase, phrase_len) == 0
12155              && **b != ch && StringNCmp ((*b) + 1, phrase, phrase_len) == 0) {
12156     (*a)--;
12157     rval = TRUE;
12158   } else if (**b == ch && StringNCmp ((*b) + 1, phrase, phrase_len) == 0
12159              && **a != ch && StringNCmp ((*a) + 1, phrase, phrase_len) == 0) {
12160     (*b)--;
12161     rval = TRUE;
12162   }
12163   return rval;
12164 }
12165 
12166 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForCommaBeforePhrase(CharPtr PNTR a,CharPtr PNTR b,CharPtr phrase)12167 static Boolean AdjustForCommaBeforePhrase (CharPtr PNTR a, CharPtr PNTR b, CharPtr phrase)
12168 {
12169   return AdjustForCharBeforePhrase (a, b, phrase, ',');
12170 }
12171 
12172 
12173 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForCommaBeforeAnd(CharPtr PNTR a,CharPtr PNTR b)12174 static Boolean AdjustForCommaBeforeAnd (CharPtr PNTR a, CharPtr PNTR b)
12175 {
12176   return AdjustForCommaBeforePhrase (a, b, " and ");
12177 }
12178 
12179 
12180 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForSkippableWord(CharPtr PNTR a,CharPtr PNTR b,CharPtr str1,CharPtr str2,CharPtr word)12181 static Boolean AdjustForSkippableWord (CharPtr PNTR a, CharPtr PNTR b, CharPtr str1, CharPtr str2, CharPtr word)
12182 {
12183   Int4 len;
12184   Boolean rval = FALSE;
12185 
12186   if (a == NULL || *a == NULL || b == NULL || *b == NULL) {
12187     return FALSE;
12188   }
12189   len = StringLen (word);
12190   if (*a - str1 > len && StringNCmp (*a - len + 1, word, len) == 0) {
12191     *a -= len - 1;
12192     *b += 1;
12193     rval = TRUE;
12194   }
12195   if (*b - str2 > len && StringNCmp (*b - len + 1, word, len) == 0) {
12196     *b -= len - 1;
12197     *a += 1;
12198     rval = TRUE;
12199   }
12200   return rval;
12201 }
12202 
12203 
12204 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForKnownDiffs(CharPtr PNTR a,CharPtr PNTR b,CharPtr str1,CharPtr str2)12205 static Boolean AdjustForKnownDiffs (CharPtr PNTR a, CharPtr PNTR b, CharPtr str1, CharPtr str2)
12206 {
12207   Boolean rval = SkipPair (a, b, str1, str2, " pseudogene, partial sequence", " gene, partial cds")
12208                 || SkipPair (a, b, str1, str2, " pseudogene, complete sequence", " gene, partial cds")
12209                 || SkipPair (a, b, str1, str2, " pseudogene, partial sequence", " gene, complete cds")
12210                 || SkipPair (a, b, str1, str2, " pseudogene, complete sequence", " gene, complete cds")
12211                 || SkipPair (a, b, str1, str2, " partial", " complete")
12212                 || SkipPair (a, b, str1, str2, "cds", "sequence")
12213                 || SkipPair (a, b, str1, str2, "gene, partial sequence;", "gene")
12214                 || SkipPair (a, b, str1, str2, "gene, complete sequence;", "gene")
12215                 || SkipPair (a, b, str1, str2, "spacer, partial sequence;", "spacer")
12216                 || SkipPair (a, b, str1, str2, "spacer, complete sequence;", "spacer")
12217                 || SkipPair (a, b, str1, str2, "gene", "genes")
12218                 || AdjustForSkippableWord (a, b, str1, str2, "-like")
12219                 || AdjustForSkippableWord (a, b, str1, str2, " truncated")
12220                 || AdjustForSkippableWord (a, b, str1, str2, " nonfunctional")
12221                 || AdjustForSkippableWord (a, b, str1, str2, " pseudo")
12222                 || AdjustForSkippableWord (a, b, str1, str2, " and")
12223                 || AdjustForSkippableWord (a, b, str1, str2, " partial sequence")
12224                 || AdjustForSkippableWord (a, b, str1, str2, " partial cds")
12225                 || AdjustForSkippableWord (a, b, str1, str2, " complete sequence")
12226                 || AdjustForSkippableWord (a, b, str1, str2, " complete cds")
12227                 || AdjustForSkippableWord (a, b, str1, str2, " gene")
12228                 || AdjustForSkippableWord (a, b, str1, str2, " genes")
12229                 || AdjustForSkippableWord (a, b, str1, str2, " pseudogene")
12230                 || AdjustForSkippableWord (a, b, str1, str2, " pseudogenes")
12231                 || AdjustForSkippableWord (a, b, str1, str2, ";")
12232                 || AdjustForSkippableWord (a, b, str1, str2, ",")
12233                 || AdjustForSpace (a, b, str1, str2);
12234   return rval;
12235 }
12236 
12237 
12238 //Not part of Autodef or Cleanup (no more popset retros)
AdjustForCommaBeforeGenomicSequence(CharPtr PNTR a,CharPtr PNTR b)12239 static Boolean AdjustForCommaBeforeGenomicSequence (CharPtr PNTR a, CharPtr PNTR b)
12240 {
12241   return AdjustForCommaBeforePhrase (a, b, " genomic sequence");
12242 }
12243 
12244 
12245 static CharPtr defline_skippable_words[] = {
12246   " transposon",
12247   " mitochondrial",
12248   NULL};
12249 
12250 
12251 //Not part of Autodef or Cleanup (no more popset retros)
SkipWord(CharPtr PNTR a,CharPtr a_start,CharPtr PNTR b)12252 static Boolean SkipWord (CharPtr PNTR a, CharPtr a_start, CharPtr PNTR b)
12253 {
12254   Int4 index, len;
12255   Boolean rval = FALSE;
12256 
12257   if (a == NULL || *a == NULL || *a <= a_start || b == NULL || *b == NULL) {
12258      return FALSE;
12259   }
12260 
12261   for (index = 0; defline_skippable_words[index] != NULL && !rval; index++) {
12262     len = StringLen (defline_skippable_words[index]);
12263     if (*a - a_start >= len && StringNCmp (*a - len + 1, defline_skippable_words[index], len) == 0) {
12264       *a -= len - 1;
12265       (*b)++;
12266       rval = TRUE;
12267     }
12268   }
12269   return rval;
12270 }
12271 
12272 
12273 //Not part of Autodef or Cleanup (no more popset retros)
MatchWithPhraseExceptions(CharPtr str1,CharPtr str2)12274 static Boolean MatchWithPhraseExceptions (CharPtr str1, CharPtr str2)
12275 {
12276   Int4 len_curr, len_new;
12277   CharPtr a = NULL, b = NULL;
12278   CharPtr mitochondrial = "; mitochondrial";
12279   Int4 len_mito = StringLen (mitochondrial);
12280 
12281   len_curr = StringLen (str1);
12282   len_new = StringLen (str2);
12283   b = str2;
12284   if (len_curr > 0) {
12285     a = str1 + len_curr - 1;
12286     b = str2 + len_new - 1;
12287     if (*a == '.') {
12288       a--;
12289     }
12290     if (*b == '.') {
12291       b--;
12292     }
12293     /* skip trailing "mitochondrial" */
12294     if (a - str1 > len_mito && StringNCmp (a - len_mito + 1, mitochondrial, len_mito) == 0) {
12295       a -= len_mito;
12296     }
12297     if (b - str2 > len_mito && StringNCmp (b - len_mito + 1, mitochondrial, len_mito) == 0) {
12298       b -= len_mito;
12299     }
12300 
12301     while (a >= str1 && b >= str2
12302             && (toupper (*a) == toupper(*b)
12303                 || SkipPair (&a, &b, str1, str2, " gene", " genes")
12304                 || SkipPair (&a, &b, str1, str2, "; and", ";")
12305                 || AdjustForCommaBeforeAnd (&a, &b)
12306                 || AdjustForCommaBeforeGenomicSequence (&a, &b)
12307                 || AdjustForSpace (&a, &b, str1, str2)
12308                 || SkipWord (&a, str1, &b)
12309                 || SkipWord (&b, str2, &a))) {
12310       a--;
12311       b--;
12312     }
12313   }
12314   if (a + 1 == str1 && b + 1 == str2) {
12315     return TRUE;
12316   } else {
12317     return FALSE;
12318   }
12319 }
12320 
12321 //Not part of Autodef or Cleanup (no more popset retros)
12322 static Nlm_CompareDeflinesProc CompareDeflinesList[] = {
12323   MatchExceptOldProduct,
12324   NoSequenceForIntron,
12325   MatchWithPhraseExceptions,
12326   NULL };
12327 
12328 
12329 
12330 //Not part of Autodef or Cleanup (no more popset retros)
DeflinesMatch(CharPtr old_str,CharPtr new_str)12331 static Boolean DeflinesMatch (CharPtr old_str, CharPtr new_str)
12332 {
12333   Int4 index;
12334   Boolean match = FALSE;
12335 
12336   if (StringCmp (old_str, new_str) == 0) {
12337     return TRUE;
12338   }
12339 
12340   for (index = 0; CompareDeflinesList[index] != NULL && !match; index++) {
12341     match = CompareDeflinesList[index](old_str, new_str);
12342   }
12343   return match;
12344 }
12345 
12346 
12347 //Not part of Autodef or Cleanup (no more popset retros)
ReplaceOldName(BioseqPtr bsp,CharPtr PNTR old_title)12348 static void ReplaceOldName (BioseqPtr bsp, CharPtr PNTR old_title)
12349 {
12350   SeqDescPtr                    sdp;
12351   SeqMgrDescContext             dcontext;
12352   BioSourcePtr                  biop;
12353   OrgModPtr                     omp;
12354 
12355   if (old_title == NULL) {
12356     return;
12357   }
12358 
12359   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
12360   if (sdp != NULL && (biop = (BioSourcePtr)sdp->data.ptrvalue) != NULL && biop->org != NULL && biop->org->orgname != NULL) {
12361     omp = biop->org->orgname->mod;
12362     while (omp != NULL) {
12363       if (omp->subtype == ORGMOD_old_name) {
12364         FindReplaceString (old_title, omp->subname, biop->org->taxname, TRUE, TRUE);
12365       }
12366       omp = omp->next;
12367     }
12368   }
12369 }
12370 
12371 
12372 //Not part of Autodef or Cleanup (no more popset retros)
AddMissingPeriod(CharPtr PNTR old_title)12373 static void AddMissingPeriod (CharPtr PNTR old_title)
12374 {
12375   Int4 len;
12376   CharPtr new_title;
12377 
12378   if (old_title == NULL || *old_title == NULL) {
12379     return;
12380   }
12381 
12382   len = StringLen (*old_title);
12383   if ((*old_title)[len - 1] != '.') {
12384     new_title = (CharPtr) MemNew (sizeof (Char) * (len + 2));
12385     sprintf (new_title, "%s.", *old_title);
12386     *old_title = MemFree (*old_title);
12387     *old_title = new_title;
12388   }
12389 }
12390 
12391 
12392 //Not part of Autodef or Cleanup (no more popset retros)
ChangeExonList(CharPtr PNTR old_title)12393 static void ChangeExonList (CharPtr PNTR old_title)
12394 {
12395   CharPtr exon_start, first_and, second_and;
12396   CharPtr src, dst;
12397 
12398   if (old_title == NULL || StringHasNoText (*old_title)
12399       || (exon_start = StringStr (*old_title, "exons ")) == NULL
12400       || (first_and = StringStr (exon_start, " and ")) == NULL
12401       || (second_and = StringStr (first_and + 1, " and ")) == NULL) {
12402     return;
12403   }
12404 
12405   if (StringCmp (second_and, " and partial cds.") != 0 && StringCmp (second_and, " and complete cds.") != 0) {
12406     return;
12407   }
12408 
12409   *first_and = ',';
12410   src = first_and + 4,
12411   dst = first_and + 1;
12412   while (*src != 0) {
12413     *dst = *src;
12414     dst++;
12415     src++;
12416   }
12417   *dst = 0;
12418 }
12419 
12420 
12421 //Not part of Autodef or Cleanup (no more popset retros)
MakeFeatureRequestsMatchExpectedTitle(BioseqPtr bsp)12422 NLM_EXTERN DefLineClauseOptionsPtr MakeFeatureRequestsMatchExpectedTitle (BioseqPtr bsp)
12423 {
12424   SeqEntryPtr                   sep;
12425   ValNodePtr                    defline_clauses = NULL;
12426   size_t                        index;
12427   Int4                          mod_index;
12428   ValNodePtr                    best_modifier_indices, default_modifier_indices, modifier_indices = NULL, tmp_mod_list;
12429   ValNodePtr                    vnp;
12430   ModifierItemLocalPtr          modList;
12431   OrganismDescriptionModifiers  odmp;
12432   SeqEntryPtr                   oldscope;
12433   CharPtr                       clause_list = NULL, attempt = NULL, old_title;
12434   DefLineFeatClausePtr          df;
12435   DefLineClauseOptionsPtr       clause_options;
12436   SeqDescPtr                    sdp;
12437   SeqMgrDescContext             dcontext;
12438   Boolean                       found_match = FALSE;
12439   Boolean                       default_exclude_sp;
12440 
12441   if (bsp == NULL) {
12442     return NULL;
12443   }
12444   sep = GetTopSeqEntryForEntityID (bsp->idx.entityID);
12445   if (sep == NULL) return NULL;
12446 
12447   modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData));
12448   if (modList == NULL) return NULL;
12449 
12450   sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_title, &dcontext);
12451   if (sdp == NULL || (old_title = (CharPtr) sdp->data.ptrvalue) == NULL) {
12452     return NULL;
12453   }
12454 
12455   old_title = StringSave (old_title);
12456   /* replacements for old RNA style */
12457   FindReplaceString (&old_title, "RNA gene, mitochondrial gene for mitochondrial RNA, partial sequence", "RNA gene, partial sequence; mitochondrial", TRUE, TRUE);
12458   FindReplaceString (&old_title, "16S ribosomal RNA, complete sequence", "16S ribosomal RNA gene, complete sequence", TRUE, TRUE);
12459   FindReplaceString (&old_title, "16S ribosomal RNA, partial sequence", "16S ribosomal RNA gene, partial sequence", TRUE, TRUE);
12460   FindReplaceString (&old_title, "subunit ribosomal RNA, complete sequence", "subunit ribosomal RNA gene, complete sequence", TRUE, TRUE);
12461   FindReplaceString (&old_title, "subunit ribosomal RNA, partial sequence", "subunit ribosomal RNA gene, partial sequence", TRUE, TRUE);
12462   FindReplaceString (&old_title, "specimen-voucher", "voucher", TRUE, TRUE);
12463 
12464   ReplaceOldName (bsp, &old_title);
12465   AddMissingPeriod (&old_title);
12466   ChangeExonList (&old_title);
12467 
12468   SetRequiredModifiers (modList);
12469   CountModifiers (modList, sep);
12470 
12471   InitOrganismDescriptionModifiers (&odmp, sep);
12472   default_exclude_sp = odmp.exclude_sp;
12473   odmp.use_modifiers = TRUE;
12474 
12475   oldscope = SeqEntrySetScope (sep);
12476 
12477   clause_options = DefLineClauseOptionsNew();
12478 
12479   best_modifier_indices = FindBestModifiersEx (sep, modList, TRUE);
12480   default_modifier_indices = GetModifierIndicesFromModList (modList);
12481 
12482   /* start loop here */
12483   for (index = 0;
12484        ClauseOptionSetList[index] != NULL && !found_match;
12485        index++) {
12486     ClauseOptionSetList[index](clause_options);
12487     BuildDefLineFeatClauseList (sep, bsp->idx.entityID,
12488                                 &(clause_options->feature_requests),
12489                                 clause_options->product_flag,
12490                                 clause_options->alternate_splice_flag,
12491                                 clause_options->gene_cluster_opp_strand,
12492                                 &defline_clauses);
12493     if (AreFeatureClausesUnique (defline_clauses)) {
12494       modifier_indices = ValNodeDupIntList(default_modifier_indices);
12495     } else {
12496       modifier_indices = ValNodeDupIntList(best_modifier_indices);
12497     }
12498     for (vnp = defline_clauses; vnp != NULL; vnp = vnp->next) {
12499       df = (DefLineFeatClausePtr) vnp->data.ptrvalue;
12500       if (df->bsp == bsp) {
12501         clause_list = df->clauselist;
12502         break;
12503       }
12504     }
12505 
12506     attempt = BuildOneDefinitionLine (sep, bsp, clause_list, modList, modifier_indices, &odmp);
12507     if (DeflinesMatch(old_title, attempt)) {
12508       found_match = TRUE;
12509     }
12510     attempt = MemFree (attempt);
12511 
12512     if (!found_match) {
12513       odmp.use_labels = FALSE;
12514       attempt = BuildOneDefinitionLine (sep, bsp, clause_list, modList, modifier_indices, &odmp);
12515       if (DeflinesMatch(old_title, attempt)) {
12516         found_match = TRUE;
12517       }
12518       attempt = MemFree (attempt);
12519       odmp.use_labels = TRUE;
12520     }
12521 
12522     if (!found_match) {
12523       for (mod_index = 0; OrgModifiersSetList[mod_index] != NULL && !found_match; mod_index++) {
12524         tmp_mod_list = ValNodeDupIntList (modifier_indices);
12525         if (OrgModifiersSetList[mod_index](&odmp, &tmp_mod_list, modList)) {
12526           attempt = BuildOneDefinitionLine (sep, bsp, clause_list, modList, tmp_mod_list, &odmp);
12527           if (DeflinesMatch(old_title, attempt)) {
12528             found_match = TRUE;
12529           }
12530           attempt = MemFree (attempt);
12531           if (!found_match) {
12532             odmp.use_labels = FALSE;
12533             attempt = BuildOneDefinitionLine (sep, bsp, clause_list, modList, tmp_mod_list, &odmp);
12534             if (DeflinesMatch(old_title, attempt)) {
12535               found_match = TRUE;
12536             }
12537             attempt = MemFree (attempt);
12538             odmp.use_labels = TRUE;
12539           }
12540         }
12541         tmp_mod_list = ValNodeFree (tmp_mod_list);
12542         odmp.exclude_sp = default_exclude_sp;
12543       }
12544     }
12545 
12546     DefLineFeatClauseListFree (defline_clauses);
12547     defline_clauses = NULL;
12548     modifier_indices = ValNodeFree (modifier_indices);
12549   }
12550   /* end loop here */
12551   best_modifier_indices = ValNodeFree (best_modifier_indices);
12552   default_modifier_indices = ValNodeFree (default_modifier_indices);
12553 
12554   old_title = MemFree (old_title);
12555 
12556   if (modList != NULL) {
12557     for (index = 0; index < NumDefLineModifiers (); index++) {
12558       ValNodeFree (modList [index].values_seen);
12559     }
12560     MemFree (modList);
12561   }
12562 
12563   SeqEntrySetScope (oldscope);
12564 
12565   if (!found_match) {
12566     clause_options = DefLineClauseOptionsFree (clause_options);
12567   }
12568 
12569   return clause_options;
12570 }
12571 
12572 
12573 //Not part of Autodef or Cleanup (no more popset retros)
MatchlenForAutodef(CharPtr str1,CharPtr str2)12574 static Int4 MatchlenForAutodef (CharPtr str1, CharPtr str2)
12575 {
12576   Int4 len_curr, len_new;
12577   CharPtr a, b;
12578 
12579   len_curr = StringLen (str1);
12580   len_new = StringLen (str2);
12581   b = str2;
12582   if (len_curr > 0) {
12583     a = str1 + len_curr - 1;
12584     b = str2 + len_new - 1;
12585     if (*a == '.') {
12586       a--;
12587     }
12588     if (*b == '.') {
12589       b--;
12590     }
12591     if (a - str1 > 13 && StringNCmp (a - 12, "; chloroplast", 13) == 0) {
12592       a-= 13;
12593     }
12594     if (b - str2 > 13 && StringNCmp (b - 12, "; chloroplast", 13) == 0) {
12595       b -= 13;
12596     }
12597     while (a >= str1 && b >= str2
12598             && (*a == *b || AdjustForKnownDiffs(&a, &b, str1, str2))) {
12599       a--;
12600       b--;
12601     }
12602   }
12603   return len_new - (b - str2 + 1);
12604 }
12605 
12606 
12607 //Not part of Autodef or Cleanup (no more popset retros)
FindCommonTitleCallback(BioseqPtr bsp,Pointer data)12608 static void FindCommonTitleCallback (BioseqPtr bsp, Pointer data)
12609 {
12610   Int4 len_curr, len_new;
12611   CharPtr new_title;
12612   CharPtr PNTR current_title;
12613   CharPtr a, b;
12614   CharPtr src, dst;
12615   SeqMgrDescContext context;
12616   SeqDescPtr sdp;
12617 
12618   if (bsp == NULL || ISA_aa (bsp->mol)) {
12619     return;
12620   }
12621   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
12622 
12623   if (sdp == NULL || sdp->choice != Seq_descr_title
12624       || (new_title = (CharPtr) sdp->data.ptrvalue) == NULL
12625       || (current_title = (CharPtr PNTR) data) == NULL) {
12626     return;
12627   }
12628 
12629   if (*current_title == NULL) {
12630     *current_title = StringSave (new_title);
12631   } else {
12632     len_curr = StringLen (*current_title);
12633     len_new = StringLen (new_title);
12634     if (len_curr > 0) {
12635       a = (*current_title) + len_curr - 1;
12636       b = new_title + len_new - 1;
12637       if (*a == '.') {
12638         a--;
12639       }
12640       if (*b == '.') {
12641         b--;
12642       }
12643       if (a - *current_title > 13 && StringNCmp (a - 12, "; chloroplast", 13) == 0) {
12644         a-= 13;
12645       }
12646       if (b - new_title > 13 && StringNCmp (b - 12, "; chloroplast", 13) == 0) {
12647         b -= 13;
12648       }
12649       while (a >= *current_title && b >= new_title
12650             && (*a == *b || AdjustForKnownDiffs(&a, &b, *current_title, new_title))) {
12651         a--;
12652         b--;
12653       }
12654       /* don't copy part of a word */
12655       while (*(a + 1) != 0 && !isspace (*(a + 1))) {
12656         a++;
12657       }
12658       if (a + 1 > *current_title) {
12659         src = a + 1;
12660         dst = *current_title;
12661         while (*src != 0) {
12662           *dst = *src;
12663           dst++;
12664           src++;
12665         }
12666         *dst = 0;
12667       }
12668     }
12669   }
12670 }
12671 
12672 typedef struct verifycommonfeatureclause {
12673   CharPtr common_clause;
12674   Boolean is_ok;
12675 } VerifyCommonFeatureClauseData, PNTR VerifyCommonFeatureClausePtr;
12676 
12677 //Not part of Autodef or Cleanup (no more popset retros)
PrintBioSource(BioSourcePtr biop)12678 static void PrintBioSource (BioSourcePtr biop)
12679 {
12680   OrgModPtr mod;
12681   SubSourcePtr ssp;
12682 
12683   printf ("Taxname: %s", biop->org->taxname);
12684   for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
12685     printf ("\tOrgMod%d:%s", mod->subtype, mod->subname);
12686   }
12687   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
12688     printf ("\tSubSource%d:%s", ssp->subtype, ssp->name);
12689   }
12690   printf ("\n");
12691 }
12692 
12693 
12694 //Not part of Autodef or Cleanup (no more popset retros)
VerifyCommonFeatureClauseCallback(BioseqPtr bsp,Pointer data)12695 static void VerifyCommonFeatureClauseCallback (BioseqPtr bsp, Pointer data)
12696 {
12697   VerifyCommonFeatureClausePtr v;
12698   SeqMgrDescContext context;
12699   SeqDescPtr sdp;
12700   CharPtr title;
12701   BioSourcePtr biop;
12702   Int4 common_clause_len, title_len, last_word_len = 0;
12703   SourceConstraint source_constraint;
12704   CharPtr last_word, cp;
12705   Char id_txt[100];
12706 
12707   if (bsp == NULL || ISA_aa (bsp->mol)
12708       || (v = (VerifyCommonFeatureClausePtr) data) == NULL
12709       || !v->is_ok) {
12710     return;
12711   }
12712 
12713   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
12714 
12715   if (sdp == NULL || sdp->choice != Seq_descr_title || (title = (CharPtr) sdp->data.ptrvalue) == NULL) {
12716     return;
12717   }
12718   title_len = StringLen (title);
12719   common_clause_len = MatchlenForAutodef (v->common_clause, title);
12720 
12721   cp = title + title_len - common_clause_len - 1;
12722   while (cp > title && !isspace (*cp)) {
12723     --cp;
12724     last_word_len++;
12725   }
12726   if (*(cp + last_word_len) == ',') {
12727     last_word_len--;
12728   }
12729 
12730   /* note - allocate extra space, in case we need to try adding a semicolon */
12731   last_word = (CharPtr) MemNew (sizeof (Char) * (last_word_len + 2));
12732   StringNCpy (last_word, cp + 1, last_word_len);
12733   last_word[last_word_len] = 0;
12734 
12735   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
12736 
12737   if (sdp == NULL || sdp->choice != Seq_descr_source || (biop = (BioSourcePtr) sdp->data.ptrvalue) == NULL) {
12738     return;
12739   }
12740 
12741   MemSet (&source_constraint, 0, sizeof (SourceConstraint));
12742   source_constraint.constraint = StringConstraintNew ();
12743   source_constraint.constraint->match_text = last_word;
12744   source_constraint.constraint->match_location = String_location_ends;
12745   source_constraint.constraint->case_sensitive = FALSE;
12746   source_constraint.constraint->whole_word = TRUE;
12747   source_constraint.constraint->not_present = FALSE;
12748 
12749   if (!DoesBiosourceMatchConstraint(biop, &source_constraint)) {
12750     StringCat (last_word, ";");
12751     source_constraint.constraint->match_location = String_location_contains;
12752     if (!DoesBiosourceMatchConstraint(biop, &source_constraint)) {
12753       /* try country with colon */
12754       last_word[last_word_len] = ':';
12755       source_constraint.constraint->match_location = String_location_contains;
12756       source_constraint.field1 = ValNodeNew (NULL);
12757       source_constraint.field1->choice = SourceQualChoice_textqual;
12758       source_constraint.field1->data.intvalue = Source_qual_country;
12759       if (!DoesBiosourceMatchConstraint(biop, &source_constraint)) {
12760         v->is_ok = FALSE;
12761         SeqIdWrite (bsp->id, id_txt, PRINTID_TEXTID_ACC_ONLY, sizeof (id_txt) - 1);
12762         printf ("For %s, common title fails, last word is %s\n", id_txt, last_word);
12763         PrintBioSource (biop);
12764       }
12765       source_constraint.field1 = ValNodeFree (source_constraint.field1);
12766     }
12767   }
12768   source_constraint.constraint = StringConstraintFree (source_constraint.constraint);
12769 }
12770 
12771 
12772 //Not part of Autodef or Cleanup (no more popset retros)
GetCommonFeatureClause(SeqEntryPtr sep)12773 static CharPtr GetCommonFeatureClause (SeqEntryPtr sep)
12774 {
12775   CharPtr common_clause = NULL;
12776   VerifyCommonFeatureClauseData v;
12777   CharPtr src, dst;
12778 
12779   VisitBioseqsInSep (sep, &common_clause, FindCommonTitleCallback);
12780   if (StringHasNoText (common_clause)) {
12781     common_clause = MemFree (common_clause);
12782   } else {
12783     MemSet (&v, 0, sizeof (VerifyCommonFeatureClauseData));
12784     v.common_clause = common_clause;
12785     v.is_ok = TRUE;
12786     VisitBioseqsInSep (sep, &v, VerifyCommonFeatureClauseCallback);
12787     if (!v.is_ok) {
12788       if (StringNCmp (common_clause, " chloroplast", 12) == 0) {
12789         dst = common_clause;
12790         for (src = common_clause + 12; *src != 0; src++) {
12791           *dst = *src;
12792         }
12793         *dst = 0;
12794         v.is_ok = TRUE;
12795         VisitBioseqsInSep (sep, &v, VerifyCommonFeatureClauseCallback);
12796       }
12797     }
12798     if (!v.is_ok) {
12799       common_clause = MemFree (common_clause);
12800     }
12801   }
12802   return common_clause;
12803 }
12804 //LCOV_EXCL_STOP
12805 
12806 
BuildDefinitionLinesFromFeatureClauseLists(ValNodePtr list,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,OrganismDescriptionModifiersPtr odmp)12807 NLM_EXTERN void BuildDefinitionLinesFromFeatureClauseLists (
12808   ValNodePtr list,
12809   ModifierItemLocalPtr modList,
12810   ValNodePtr modifier_indices,
12811   OrganismDescriptionModifiersPtr odmp
12812 )
12813 {
12814   ValNodePtr vnp;
12815   DefLineFeatClausePtr defline_featclause;
12816   CharPtr    tmp_str;
12817 
12818   for (vnp = list; vnp != NULL; vnp = vnp->next)
12819   {
12820     if (vnp->data.ptrvalue != NULL)
12821     {
12822       defline_featclause = vnp->data.ptrvalue;
12823       tmp_str = BuildOneDefinitionLine (defline_featclause->sep, defline_featclause->bsp,
12824                                         defline_featclause->clauselist,
12825                                         modList, modifier_indices, odmp);
12826       ReplaceDefinitionLine (defline_featclause->sep, tmp_str);
12827     }
12828   }
12829 }
12830 
12831 //LCOV_EXCL_START
12832 //Not part of Autodef or Cleanup
BuildDefLinesFromFeatClauseListsForOneBsp(ValNodePtr list,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,OrganismDescriptionModifiersPtr odmp,BioseqPtr bsp)12833 NLM_EXTERN void BuildDefLinesFromFeatClauseListsForOneBsp (
12834   ValNodePtr list,
12835   ModifierItemLocalPtr modList,
12836   ValNodePtr modifier_indices,
12837   OrganismDescriptionModifiersPtr odmp,
12838   BioseqPtr bsp
12839 )
12840 {
12841   ValNodePtr vnp;
12842   DefLineFeatClausePtr defline_featclause;
12843   CharPtr    tmp_str;
12844 
12845   for (vnp = list; vnp != NULL; vnp = vnp->next)
12846   {
12847     if (vnp->data.ptrvalue != NULL)
12848     {
12849       defline_featclause = vnp->data.ptrvalue;
12850       if (defline_featclause == NULL) continue;
12851       if (defline_featclause->bsp != bsp) continue;
12852       tmp_str = BuildOneDefinitionLine (defline_featclause->sep, defline_featclause->bsp,
12853                                         defline_featclause->clauselist,
12854                                         modList, modifier_indices, odmp);
12855       ReplaceDefinitionLine (defline_featclause->sep, tmp_str);
12856     }
12857   }
12858 }
12859 
12860 
12861 /* This removes redundant titles on nuc-prot sets, which will not be
12862  * visible in the flat file if all sequences in the nuc-prot set have
12863  * their own title.
12864  */
12865 // Not actually called on nuc-prot sets during title regeneration
RemoveNucProtSetTitles(SeqEntryPtr sep)12866 NLM_EXTERN void RemoveNucProtSetTitles (SeqEntryPtr sep)
12867 {
12868   BioseqSetPtr bssp;
12869   SeqEntryPtr  this_sep;
12870   SeqDescrPtr  sdp, prev = NULL, sdp_next;
12871 
12872   if (sep == NULL || ! IS_Bioseq_set (sep))
12873   {
12874     return;
12875   }
12876   bssp = (BioseqSetPtr) sep->data.ptrvalue;
12877   if (bssp == NULL) return;
12878   for (this_sep = bssp->seq_set; this_sep != NULL; this_sep = this_sep->next)
12879   {
12880     RemoveNucProtSetTitles (this_sep);
12881   }
12882 
12883   if (bssp->_class != BioseqseqSet_class_nuc_prot)
12884   {
12885     return;
12886   }
12887   for (sdp = bssp->descr; sdp != NULL; sdp = sdp_next)
12888   {
12889     sdp_next = sdp->next;
12890     if (sdp->choice == Seq_descr_title)
12891     {
12892       if (prev == NULL)
12893       {
12894         bssp->descr = sdp->next;
12895       }
12896       else
12897       {
12898         prev->next = sdp->next;
12899       }
12900       sdp->next = NULL;
12901       SeqDescrFree (sdp);
12902     }
12903     else
12904     {
12905       prev = sdp;
12906     }
12907   }
12908 }
12909 //LCOV_EXCL_STOP
12910 
12911 
ProtTitleRemoveProc(BioseqPtr bsp,Pointer userdata)12912 static void ProtTitleRemoveProc (BioseqPtr bsp, Pointer userdata)
12913 
12914 {
12915   ObjValNodePtr  ovp;
12916   SeqDescrPtr    sdp;
12917 
12918   if (bsp == NULL) return;
12919   if (! ISA_aa (bsp->mol)) return;
12920 
12921   for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
12922     if (sdp->choice == Seq_descr_title && sdp->extended) {
12923       ovp = (ObjValNodePtr) sdp;
12924       ovp->idx.deleteme = TRUE;
12925     }
12926   }
12927 }
12928 
RemoveProteinTitles(SeqEntryPtr sep)12929 NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep)
12930 
12931 {
12932   Uint2  entityID;
12933 
12934   if (sep == NULL) return;
12935   VisitBioseqsInSep (sep, NULL, ProtTitleRemoveProc);
12936   entityID = ObjMgrGetEntityIDForChoice (sep);
12937   DeleteMarkedObjects (entityID, 0, NULL);
12938 }
12939 
12940 //LCOV_EXCL_START
12941 //not used in autodef or cleanup
MRnaTitleRemoveProc(BioseqPtr bsp,Pointer userdata)12942 static void MRnaTitleRemoveProc (BioseqPtr bsp, Pointer userdata)
12943 
12944 {
12945   MolInfoPtr     mip;
12946   ObjValNodePtr  ovp;
12947   SeqDescrPtr    sdp;
12948 
12949   if (bsp == NULL) return;
12950   if (! ISA_na (bsp->mol)) return;
12951 
12952   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
12953   if (sdp == NULL) return;
12954   mip = (MolInfoPtr) sdp->data.ptrvalue;
12955   if (mip == NULL || mip->biomol != MOLECULE_TYPE_MRNA) return;
12956 
12957   for (sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
12958     if (sdp->choice == Seq_descr_title && sdp->extended) {
12959       ovp = (ObjValNodePtr) sdp;
12960       ovp->idx.deleteme = TRUE;
12961     }
12962   }
12963 }
12964 
12965 //not used in autodef or cleanup
RemoveMRnaTitles(SeqEntryPtr sep)12966 NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep)
12967 
12968 {
12969   Uint2  entityID;
12970 
12971   if (sep == NULL) return;
12972   VisitBioseqsInSep (sep, NULL, MRnaTitleRemoveProc);
12973   entityID = ObjMgrGetEntityIDForChoice (sep);
12974   DeleteMarkedObjects (entityID, 0, NULL);
12975 }
12976 
12977 
12978 typedef struct popsetdefline {
12979  DeflineFeatureRequestListPtr feature_requests;
12980  Int2 product_flag;
12981  Boolean alternate_splice_flag;
12982  Boolean gene_cluster_opp_strand;
12983 } PopsetDeflineData, PNTR PopsetDeflinePtr;
12984 
12985 
12986 //Not regenerating popset titles
GetsDocsumTitle(Uint1 set_class)12987 NLM_EXTERN Boolean GetsDocsumTitle(Uint1 set_class)
12988 {
12989   if (set_class == BioseqseqSet_class_pop_set
12990       || set_class == BioseqseqSet_class_phy_set
12991       || set_class == BioseqseqSet_class_eco_set
12992       || set_class == BioseqseqSet_class_mut_set) {
12993     return TRUE;
12994   } else {
12995     return FALSE;
12996   }
12997 }
12998 
12999 //Not part of Autodef when not regenerating popset titles
HasTitle(SeqDescrPtr descr)13000 static Boolean HasTitle(SeqDescrPtr descr)
13001 {
13002   while (descr != NULL) {
13003     if (descr->choice == Seq_descr_title) {
13004       return TRUE;
13005     }
13006     descr = descr->next;
13007   }
13008   return FALSE;
13009 }
13010 
13011 
13012 //Not part of Autodef or Cleanup (no more popset retros)
AddPopsetDeflineWithClause(BioseqSetPtr bssp,CharPtr clause)13013 NLM_EXTERN void AddPopsetDeflineWithClause (BioseqSetPtr bssp, CharPtr clause)
13014 {
13015   SeqEntryPtr set_sep;
13016   OrgRefPtr   org;
13017   CharPtr     org_desc = NULL, keyword_prefix = NULL, tmp_str = NULL;
13018 
13019   set_sep = SeqMgrGetSeqEntryForData (bssp);
13020   org = GetCommonOrgRefForSeqEntry (set_sep);
13021   if (org == NULL || StringHasNoText (org->taxname)) {
13022     return;
13023   }
13024   org_desc = StringSave (org->taxname);
13025   org = OrgRefFree (org);
13026 
13027   keyword_prefix = GetKeywordPrefix (set_sep);
13028 
13029   tmp_str = (CharPtr) MemNew (StringLen (keyword_prefix)
13030                               + StringLen (org_desc)
13031                               + StringLen (clause) + 2);
13032   if (tmp_str == NULL) return;
13033   tmp_str [0] = 0;
13034   if (keyword_prefix != NULL)
13035   {
13036     StringCat (tmp_str, keyword_prefix);
13037   }
13038   StringCat (tmp_str, org_desc);
13039   if (clause != NULL
13040     && clause[0] != ','
13041     && clause[0] != '.'
13042     && clause[0] != ' '
13043     && clause[0] != 0)
13044   {
13045     StringCat (tmp_str, " ");
13046   }
13047   StringCat (tmp_str, clause);
13048   tmp_str [0] = toupper (tmp_str [0]);
13049   ReplaceDefinitionLine (set_sep, tmp_str);
13050   MemFree (org_desc);
13051 }
13052 
13053 
13054 //Not regenerating popset titles
AddPopsetCallback(BioseqSetPtr bssp,Pointer data)13055 static void AddPopsetCallback (BioseqSetPtr bssp, Pointer data)
13056 {
13057   SeqEntryPtr          set_sep, first_sep;
13058   OrgRefPtr            org;
13059   ValNodePtr           defline_clauses = NULL;
13060   DefLineFeatClausePtr defline_featclause;
13061   CharPtr              org_desc = NULL, keyword_prefix = NULL, tmp_str = NULL;
13062   PopsetDeflinePtr     pop;
13063 
13064   if (bssp == NULL
13065       || !GetsDocsumTitle(bssp->_class)
13066       || HasTitle(bssp->descr)
13067       || (pop = (PopsetDeflinePtr)data) == NULL) {
13068     return;
13069   }
13070 
13071   set_sep = SeqMgrGetSeqEntryForData (bssp);
13072   org = GetCommonOrgRefForSeqEntry (set_sep);
13073   if (org == NULL || StringHasNoText (org->taxname)) {
13074     return;
13075   }
13076   org_desc = StringSave (org->taxname);
13077   org = OrgRefFree (org);
13078 
13079   /* make title here */
13080   first_sep = bssp->seq_set;
13081 
13082   BuildDefLineFeatClauseList (first_sep, bssp->idx.entityID,
13083                               pop->feature_requests,
13084                               pop->product_flag, pop->alternate_splice_flag, pop->gene_cluster_opp_strand,
13085                               &defline_clauses);
13086 
13087   if (defline_clauses != NULL) {
13088     defline_featclause = defline_clauses->data.ptrvalue;
13089 
13090     keyword_prefix = GetKeywordPrefix (defline_featclause->sep);
13091 
13092     tmp_str = (CharPtr) MemNew (StringLen (keyword_prefix)
13093                                 + StringLen (org_desc)
13094                                 + StringLen (defline_featclause->clauselist) + 2);
13095     if (tmp_str == NULL) return;
13096     tmp_str [0] = 0;
13097     if (keyword_prefix != NULL)
13098     {
13099       StringCat (tmp_str, keyword_prefix);
13100     }
13101     StringCat (tmp_str, org_desc);
13102     if (defline_featclause->clauselist != NULL
13103       && defline_featclause->clauselist [0] != ','
13104       && defline_featclause->clauselist [0] != '.'
13105       && defline_featclause->clauselist [0] != 0)
13106     {
13107       StringCat (tmp_str, " ");
13108     }
13109     StringCat (tmp_str, defline_featclause->clauselist);
13110     tmp_str [0] = toupper (tmp_str [0]);
13111     ReplaceDefinitionLine (set_sep, tmp_str);
13112   }
13113   MemFree (org_desc);
13114 }
13115 
13116 //Not regenerating popset titles
AddPopsetTitles(SeqEntryPtr sep,DeflineFeatureRequestListPtr feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand)13117 NLM_EXTERN void AddPopsetTitles
13118 (SeqEntryPtr sep,
13119  DeflineFeatureRequestListPtr feature_requests,
13120  Int2 product_flag,
13121  Boolean alternate_splice_flag,
13122  Boolean gene_cluster_opp_strand)
13123 {
13124   PopsetDeflineData pop;
13125 
13126   pop.feature_requests = feature_requests;
13127   /* forcibly suppress alleles in popset titles */
13128   pop.feature_requests->suppress_allele = TRUE;
13129   pop.product_flag = product_flag;
13130   pop.alternate_splice_flag = alternate_splice_flag;
13131   pop.gene_cluster_opp_strand = gene_cluster_opp_strand;
13132 
13133   VisitSetsInSep (sep, &pop, AddPopsetCallback);
13134 }
13135 
13136 //Not regenerating popset titles
RemovePopsetTitlesCallback(BioseqSetPtr bssp,Pointer data)13137 static void RemovePopsetTitlesCallback(BioseqSetPtr bssp, Pointer data)
13138 {
13139   SeqDescrPtr   sdp;
13140   ObjValNodePtr ovp;
13141 
13142   if (bssp == NULL || !GetsDocsumTitle(bssp->_class)) {
13143     return;
13144   }
13145   for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
13146     if (sdp->choice == Seq_descr_title
13147         && sdp->extended) {
13148       ovp = (ObjValNodePtr) sdp;
13149       ovp->idx.deleteme = TRUE;
13150     }
13151   }
13152 }
13153 
13154 //Not regenerating popset titles
RemovePopsetTitles(SeqEntryPtr sep)13155 NLM_EXTERN void RemovePopsetTitles(SeqEntryPtr sep)
13156 {
13157   Uint2 entityID;
13158 
13159   VisitSetsInSep (sep, NULL, RemovePopsetTitlesCallback);
13160   entityID = ObjMgrGetEntityIDForChoice (sep);
13161   DeleteMarkedObjects (entityID, 0, NULL);
13162 }
13163 //LCOV_EXCL_STOP
13164 
13165 
13166 const CharPtr kAutoDefOptions = "AutodefOptions";
13167 const CharPtr kAltSpliceFlag = "AltSpliceFlag";
13168 const CharPtr kDoNotApplyToAff = "DoNotApplyToAff";
13169 const CharPtr kDoNotApplyToCf = "DoNotApplyToCf";
13170 const CharPtr kDoNotApplyToNr = "DoNotApplyToNr";
13171 const CharPtr kDoNotApplyToSp = "DoNotApplyToSp";
13172 const CharPtr kFeatureListType = "FeatureListType";
13173 const CharPtr kGeneClusterOppStrand = "GeneClusterOppStrand";
13174 const CharPtr kHIVRule = "HIVRule";
13175 const CharPtr kIncludeCountryText = "IncludeCountryText";
13176 const CharPtr kKeep3UTRs = "Keep3UTRs";
13177 const CharPtr kKeep5UTRs = "Keep5UTRs";
13178 const CharPtr kKeepAfterSemicolon = "KeepAfterSemicolon";
13179 const CharPtr kKeepExons = "KeepExons";
13180 const CharPtr kKeepIntrons = "KeepIntrons";
13181 const CharPtr kKeepLTRs = "KeepLTRs";
13182 const CharPtr kKeepPromoters = "KeepPromoters";
13183 const CharPtr kLeaveParenthetical = "LeaveParenthetical";
13184 const CharPtr kMaxMods = "MaxMods";
13185 const CharPtr kMiscFeatRule = "MiscFeatRule";
13186 const CharPtr kModifierList = "ModifierList";
13187 const CharPtr kProductFlag = "ProductFlag";
13188 const CharPtr kNuclearCopyFlag = "NuclearCopyFlag";
13189 const CharPtr kSpecifyNuclearProduct = "SpecifyNuclearProduct";
13190 const CharPtr kSuppressedFeatures = "SuppressedFeatures";
13191 const CharPtr kSuppressFeatureAltSplice = "SuppressFeatureAltSplice";
13192 const CharPtr kSuppressLocusTags = "SuppressLocusTags";
13193 const CharPtr kSuppressMobileElementSubfeatures = "SuppressMobileElementSubfeatures";
13194 const CharPtr kUseFakePromoters = "UseFakePromoters";
13195 const CharPtr kUseLabels = "UseLabels";
13196 const CharPtr kUseNcRNAComment = "UseNcRNAComment";
13197 const CharPtr kAllowModAtEndOfTaxname = "AllowModAtEndOfTaxname";
13198 const CharPtr kKeepuOrf = "KeepuOrf";
13199 const CharPtr kKeepMobileElement = "KeepMobileElement";
13200 const CharPtr kKeepNoncodingProductFeat = "KeepNoncodingProductFeat";
13201 const CharPtr kKeepPrecursorRNA = "KeepPrecursorRNA";
13202 const CharPtr kKeepncRNA = "KeepncRNA";
13203 const CharPtr kKeepRepeatRegion = "KeepRepeatRegion";
13204 const CharPtr kSuppressAllele = "SuppressAllele";
13205 
13206 /* field values for HIV rule*/
13207 const CharPtr kPreferClone = "PreferClone";
13208 const CharPtr kPreferIsolate = "PreferIsolate";
13209 const CharPtr kWantBoth = "WantBoth";
13210 /* field values for feature list */
13211 const CharPtr kCompleteGenome = "Complete Genome";
13212 const CharPtr kCompleteSequence = "Complete Sequence";
13213 const CharPtr kListAllFeatures = "List All Features";
13214 const CharPtr kPartialGenome = "Partial Genome";
13215 const CharPtr kPartialSequence = "Partial Sequence";
13216 const CharPtr kSequence = "Sequence";
13217 /* field values for misc feat rules */
13218 const CharPtr kCommentFeat = "CommentFeat";
13219 const CharPtr kDelete = "Delete";
13220 const CharPtr kNoncodingProductFeat = "NoncodingProductFeat";
13221 
IsAutoDefOptions(UserObjectPtr uop)13222 static Boolean IsAutoDefOptions(UserObjectPtr uop)
13223 {
13224     if (uop != NULL && uop->type != NULL &&
13225         StringICmp(uop->type->str, kAutoDefOptions) == 0) {
13226         return TRUE;
13227     } else {
13228         return FALSE;
13229     }
13230 }
13231 
13232 //LCOV_EXCL_START
13233 //not testing construction of autodef options object at this time
LabelUserField(UserFieldPtr ufp,CharPtr field_name)13234 void LabelUserField(UserFieldPtr ufp, CharPtr field_name)
13235 {
13236     ufp->label = ObjectIdNew();
13237     ufp->label->str = StringSave(field_name);
13238 }
13239 
13240 //not testing construction of autodef options object at this time
AddFieldToUserObject(UserFieldPtr ufp,UserObjectPtr uop)13241 void AddFieldToUserObject(UserFieldPtr ufp, UserObjectPtr uop)
13242 {
13243     ufp->next = uop->data;
13244     uop->data = ufp;
13245 }
13246 
13247 //not testing construction of autodef options object at this time
AddBooleanAutodefField(UserObjectPtr uop,CharPtr field_name)13248 void AddBooleanAutodefField(UserObjectPtr uop, CharPtr field_name)
13249 {
13250     UserFieldPtr ufp = UserFieldNew();
13251     LabelUserField(ufp, field_name);
13252     ufp->choice = 4;
13253     ufp->data.boolvalue = TRUE;
13254     AddFieldToUserObject(ufp, uop);
13255 }
13256 
13257 //not testing construction of autodef options object at this time
AddAutodefProductFlag(UserObjectPtr uop,Int2 product_flag)13258 void AddAutodefProductFlag(UserObjectPtr uop, Int2 product_flag)
13259 {
13260     UserFieldPtr ufp;
13261     CharPtr val;
13262 
13263     if (product_flag == DEFAULT_ORGANELLE_CLAUSE) {
13264         AddBooleanAutodefField(uop, kSpecifyNuclearProduct);
13265     } else if (product_flag > DEFAULT_ORGANELLE_CLAUSE) {
13266         val = organelleByPopup[product_flag - DEFAULT_ORGANELLE_CLAUSE];
13267         if (val != NULL) {
13268             ufp = UserFieldNew();
13269             LabelUserField(ufp, kNuclearCopyFlag);
13270             ufp->choice = 1;
13271             ufp->data.ptrvalue = StringSave(val);
13272             AddFieldToUserObject(ufp, uop);
13273         }
13274     } else {
13275         val = organelleByPopup[product_flag];
13276         if (val != NULL) {
13277             ufp = UserFieldNew();
13278             LabelUserField(ufp, kProductFlag);
13279             ufp->choice = 1;
13280             ufp->data.ptrvalue = StringSave(val);
13281             AddFieldToUserObject(ufp, uop);
13282         }
13283     }
13284 }
13285 
13286 //not testing construction of autodef options object at this time
AddHIVRule(UserObjectPtr uop,Int4 rule)13287 void AddHIVRule(UserObjectPtr uop, Int4 rule)
13288 {
13289     UserFieldPtr ufp;
13290 
13291     ufp = UserFieldNew();
13292     LabelUserField(ufp, kHIVRule);
13293     ufp->choice = 1;
13294     switch (rule) {
13295         case clone_isolate_HIV_rule_prefer_clone:
13296             ufp->data.ptrvalue = StringSave(kPreferClone);
13297             break;
13298         case clone_isolate_HIV_rule_prefer_isolate:
13299             ufp->data.ptrvalue = StringSave(kPreferIsolate);
13300             break;
13301         case clone_isolate_HIV_rule_want_both:
13302             ufp->data.ptrvalue = StringSave(kWantBoth);
13303             break;
13304         default:
13305             break;
13306     }
13307     AddFieldToUserObject(ufp, uop);
13308 }
13309 
13310 //not testing construction of autodef options object at this time
AddOrganismDescriptionModifiersToAutoDefUserObject(UserObjectPtr uop,OrganismDescriptionModifiersPtr odmp)13311 void AddOrganismDescriptionModifiersToAutoDefUserObject
13312 (UserObjectPtr uop,
13313  OrganismDescriptionModifiersPtr odmp)
13314 {
13315     UserFieldPtr ufp;
13316 
13317     if (odmp->use_labels) {
13318         AddBooleanAutodefField(uop, kUseLabels);
13319     }
13320     ufp = UserFieldNew();
13321     LabelUserField(ufp, kMaxMods);
13322     ufp->choice = 2;
13323     ufp->data.intvalue = odmp->max_mods;
13324     AddFieldToUserObject(ufp, uop);
13325     if (odmp->keep_paren) {
13326         AddBooleanAutodefField(uop, kLeaveParenthetical);
13327     }
13328     if (odmp->exclude_sp) {
13329         AddBooleanAutodefField(uop, kDoNotApplyToSp);
13330     }
13331     if (odmp->exclude_cf) {
13332         AddBooleanAutodefField(uop, kDoNotApplyToCf);
13333     }
13334     if (odmp->exclude_aff) {
13335         AddBooleanAutodefField(uop, kDoNotApplyToAff);
13336     }
13337     if (odmp->exclude_nr) {
13338         AddBooleanAutodefField(uop, kDoNotApplyToNr);
13339     }
13340     if (odmp->include_country_extra) {
13341         AddBooleanAutodefField(uop, kIncludeCountryText);
13342     }
13343     AddHIVRule(uop, odmp->clone_isolate_HIV_rule_num);
13344     if (odmp->allow_semicolon_in_modifier) {
13345         AddBooleanAutodefField(uop, kKeepAfterSemicolon);
13346     }
13347     if (odmp->allow_mod_at_end_of_taxname) {
13348         AddBooleanAutodefField(uop, kAllowModAtEndOfTaxname);
13349     }
13350 }
13351 
13352 
13353 //not testing construction of autodef options object at this time
AddFeatureListType(UserObjectPtr uop,Int4 rule)13354 void AddFeatureListType(UserObjectPtr uop, Int4 rule)
13355 {
13356     UserFieldPtr ufp;
13357 
13358     ufp = UserFieldNew();
13359     LabelUserField(ufp, kFeatureListType);
13360     ufp->choice = 1;
13361     switch (rule) {
13362     case DEFLINE_USE_FEATURES:
13363         ufp->data.ptrvalue = StringSave(kListAllFeatures);
13364         break;
13365     case DEFLINE_COMPLETE_GENOME:
13366         ufp->data.ptrvalue = StringSave(kCompleteGenome);
13367         break;
13368     case DEFLINE_COMPLETE_SEQUENCE:
13369         ufp->data.ptrvalue = StringSave(kCompleteSequence);
13370         break;
13371     case DEFLINE_SEQUENCE:
13372         ufp->data.ptrvalue = StringSave(kSequence);
13373         break;
13374     case DEFLINE_PARTIAL_GENOME:
13375         ufp->data.ptrvalue = StringSave(kPartialGenome);
13376         break;
13377     case DEFLINE_PARTIAL_SEQUENCE:
13378         ufp->data.ptrvalue = StringSave(kPartialSequence);
13379         break;
13380     default:
13381         break;
13382     }
13383     AddFieldToUserObject(ufp, uop);
13384 }
13385 
13386 
13387 //not testing construction of autodef options object at this time
AddMiscFeatParseRule(UserObjectPtr uop,Int4 misc_feat_parse_rule)13388 void AddMiscFeatParseRule(UserObjectPtr uop, Int4 misc_feat_parse_rule)
13389 {
13390     UserFieldPtr ufp;
13391 
13392     ufp = UserFieldNew();
13393     LabelUserField(ufp, kMiscFeatRule);
13394     ufp->choice = 1;
13395     switch (misc_feat_parse_rule) {
13396     case 1:
13397         ufp->data.ptrvalue = StringSave(kCommentFeat);
13398         break;
13399     case 2:
13400         ufp->data.ptrvalue = StringSave(kNoncodingProductFeat);
13401         break;
13402     case 3:
13403         ufp->data.ptrvalue = StringSave(kDelete);
13404         break;
13405     default:
13406         break;
13407     }
13408     AddFieldToUserObject(ufp, uop);
13409 }
13410 
13411 //not testing construction of autodef options object at this time
BuildStringsField(CharPtr field_name,ValNodePtr vals)13412 UserFieldPtr BuildStringsField(CharPtr field_name, ValNodePtr vals)
13413 {
13414     UserFieldPtr ufp;
13415     CharPtr PNTR cpp;
13416     ValNodePtr vnp;
13417     Int4 i;
13418 
13419     ufp = UserFieldNew();
13420     LabelUserField(ufp, field_name);
13421     ufp->choice = 7;
13422     ufp->num = ValNodeLen(vals);
13423     cpp = (CharPtr PNTR) MemNew(ufp->num * sizeof(CharPtr));
13424     for (i = 0, vnp = vals; vnp != NULL; vnp = vnp->next, i++) {
13425         cpp[i] = StringSave(vnp->data.ptrvalue);
13426     }
13427     ufp->data.ptrvalue = cpp;
13428     return ufp;
13429 }
13430 
13431 //not testing construction of autodef options object at this time
AddSuppressedFeatures(UserObjectPtr uop,ValNodePtr list)13432 void AddSuppressedFeatures(UserObjectPtr uop, ValNodePtr list)
13433 {
13434     UserFieldPtr ufp;
13435     ValNodePtr vnp, val_list = NULL;
13436     CharPtr val;
13437     Int4 num_unrecognized = 0;
13438 
13439     if (list == NULL) {
13440         return;
13441     }
13442     for (vnp = list; vnp != NULL; vnp = vnp->next) {
13443         val = GetFeatureNameFromFeatureType(GetFeatureTypeFromFeatdef(vnp->choice));
13444         if (StringICmp(val, "any") == 0) {
13445             num_unrecognized++;
13446         } else {
13447             ValNodeAddPointer(&val_list, 0, val);
13448         }
13449     }
13450     ufp = BuildStringsField(kSuppressedFeatures, val_list);
13451     val_list = ValNodeFree(val_list);
13452     AddFieldToUserObject(ufp, uop);
13453 }
13454 
13455 
13456 //not testing construction of autodef options object at this time
AddDeflineFeatureRequestListToAutoDefUserObject(UserObjectPtr uop,DeflineFeatureRequestListPtr rq)13457 void AddDeflineFeatureRequestListToAutoDefUserObject
13458 (UserObjectPtr uop,
13459 DeflineFeatureRequestListPtr rq)
13460 {
13461     if (rq->keep_items[RemovableExon]) {
13462         AddBooleanAutodefField(uop, kKeepExons);
13463     }
13464     if (rq->keep_items[RemovableIntron]) {
13465         AddBooleanAutodefField(uop, kKeepIntrons);
13466     }
13467     if (rq->keep_items[Removable5UTR]) {
13468         AddBooleanAutodefField(uop, kKeep5UTRs);
13469     }
13470     if (rq->keep_items[Removable3UTR]) {
13471         AddBooleanAutodefField(uop, kKeep3UTRs);
13472     }
13473     if (rq->keep_items[RemovablePromoter]) {
13474         AddBooleanAutodefField(uop, kKeepPromoters);
13475         if (rq->add_fake_promoters) {
13476             AddBooleanAutodefField(uop, kUseFakePromoters);
13477         }
13478     }
13479     if (rq->keep_items[RemovableLTR]) {
13480         AddBooleanAutodefField(uop, kKeepLTRs);
13481     }
13482     if (rq->keep_items[RemovableuORF]) {
13483         AddBooleanAutodefField(uop, kKeepuOrf);
13484     }
13485     if (rq->keep_items[RemovableNoncodingProductFeat]) {
13486         AddBooleanAutodefField(uop, kKeepNoncodingProductFeat);
13487     }
13488     if (rq->keep_items[RemovableMobileElement]) {
13489         AddBooleanAutodefField(uop, kKeepMobileElement);
13490     }
13491     if (rq->keep_items[RemovablePrecursorRNA]) {
13492         AddBooleanAutodefField(uop, kKeepPrecursorRNA);
13493     }
13494     if (rq->keep_items[RemovablencRNA]) {
13495         AddBooleanAutodefField(uop, kKeepncRNA);
13496     }
13497     if (rq->keep_items[RemovableRepeatRegion]) {
13498         AddBooleanAutodefField(uop, kKeepRepeatRegion);
13499     }
13500 
13501     if (rq->suppress_alt_splice_phrase) {
13502         AddBooleanAutodefField(uop, kSuppressFeatureAltSplice);
13503     }
13504     if (rq->remove_subfeatures) {
13505         AddBooleanAutodefField(uop, kSuppressMobileElementSubfeatures);
13506     }
13507     AddFeatureListType(uop, rq->feature_list_type);
13508     AddMiscFeatParseRule(uop, rq->misc_feat_parse_rule);
13509     if (rq->suppress_locus_tags) {
13510         AddBooleanAutodefField(uop, kSuppressLocusTags);
13511     }
13512     AddSuppressedFeatures(uop, rq->suppressed_feature_list);
13513 
13514     if (rq->use_ncrna_note) {
13515         AddBooleanAutodefField(uop, kUseNcRNAComment);
13516     }
13517 }
13518 
13519 const CharPtr kSubSources = "SubSources";
13520 const CharPtr kOrgMods = "OrgMods";
13521 
13522 //not testing construction of autodef options object at this time
AddModListToAutoDefUserObject(UserObjectPtr uop,ValNodePtr modifier_indices)13523 void AddModListToAutoDefUserObject(UserObjectPtr uop, ValNodePtr modifier_indices)
13524 {
13525     UserFieldPtr ufp, ufp_ss = NULL, ufp_mod = NULL;
13526     ValNodePtr ss_vals = NULL, mod_vals = NULL, vnp;
13527 
13528     for (vnp = modifier_indices; vnp != NULL; vnp = vnp->next) {
13529         if (DefLineModifiers[vnp->data.intvalue].isOrgMod) {
13530             ValNodeAddPointer(&mod_vals, 0, DefLineModifiers[vnp->data.intvalue].name);
13531         } else {
13532             ValNodeAddPointer(&ss_vals, 0, DefLineModifiers[vnp->data.intvalue].name);
13533         }
13534     }
13535     if (ss_vals != NULL) {
13536         ufp_ss = BuildStringsField(kSubSources, ss_vals);
13537         ss_vals = ValNodeFree(ss_vals);
13538     }
13539 
13540     if (mod_vals != NULL) {
13541         ufp_mod = BuildStringsField(kOrgMods, mod_vals);
13542         mod_vals = ValNodeFree(mod_vals);
13543     }
13544 
13545     if (ufp_ss != NULL || ufp_mod != NULL) {
13546         ufp = UserFieldNew();
13547         LabelUserField(ufp, kModifierList);
13548         ufp->choice = 11;
13549         if (ufp_ss != NULL) {
13550             ufp_ss->next = ufp_mod;
13551             ufp->data.ptrvalue = ufp_ss;
13552         } else {
13553             ufp->data.ptrvalue = ufp_mod;
13554         }
13555         AddFieldToUserObject(ufp, uop);
13556     }
13557 }
13558 
13559 //not testing construction of autodef options object at this time
MakeAutoDefOptionsUserObject(OrganismDescriptionModifiersPtr odmp,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,DeflineFeatureRequestListPtr feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand)13560 NLM_EXTERN UserObjectPtr MakeAutoDefOptionsUserObject
13561 (OrganismDescriptionModifiersPtr odmp,
13562  ModifierItemLocalPtr modList,
13563  ValNodePtr modifier_indices,
13564  DeflineFeatureRequestListPtr feature_requests,
13565  Int2 product_flag,
13566  Boolean alternate_splice_flag,
13567  Boolean gene_cluster_opp_strand)
13568 {
13569     UserObjectPtr uop;
13570 
13571     uop = UserObjectNew();
13572     uop->type = ObjectIdNew();
13573     uop->type->str = StringSave(kAutoDefOptions);
13574     uop->_class = StringSave("1.0");
13575 
13576     AddOrganismDescriptionModifiersToAutoDefUserObject(uop, odmp);
13577     if (odmp->use_modifiers) {
13578         AddModListToAutoDefUserObject(uop, modifier_indices);
13579     }
13580     AddDeflineFeatureRequestListToAutoDefUserObject(uop, feature_requests);
13581     if (gene_cluster_opp_strand) {
13582         AddBooleanAutodefField(uop, kGeneClusterOppStrand);
13583     }
13584     if (alternate_splice_flag) {
13585         AddBooleanAutodefField(uop, kAltSpliceFlag);
13586     }
13587     AddAutodefProductFlag(uop, product_flag);
13588 
13589 
13590     return uop;
13591 }
13592 
13593 //not testing construction of autodef options object at this time
RemoveAutoDefObjectCallback(SeqDescPtr sdp,Pointer data)13594 static void RemoveAutoDefObjectCallback(SeqDescPtr sdp, Pointer data)
13595 {
13596     ObjValNodePtr ovp;
13597 
13598     if (sdp != NULL && sdp->extended && sdp->choice == Seq_descr_user &&
13599         IsAutoDefOptions(sdp->data.ptrvalue)) {
13600         ovp = (ObjValNodePtr)sdp;
13601         ovp->idx.deleteme = TRUE;
13602     }
13603 }
13604 
13605 //not testing construction of autodef options object at this time
RemoveAutodefObjects(SeqEntryPtr sep)13606 NLM_EXTERN void RemoveAutodefObjects(SeqEntryPtr sep)
13607 {
13608     VisitDescriptorsInSep(sep, NULL, RemoveAutoDefObjectCallback);
13609     DeleteMarkedObjects(0, OBJ_SEQENTRY, (Pointer)sep);
13610 }
13611 
13612 
13613 //not testing construction of autodef options object at this time
RemoveAutodefObjectsForDesc(SeqDescPtr sdp)13614 NLM_EXTERN void RemoveAutodefObjectsForDesc(SeqDescPtr sdp)
13615 {
13616     ObjValNodePtr ovp;
13617     BioseqPtr bsp;
13618     BioseqSetPtr bssp;
13619     SeqEntryPtr sep;
13620 
13621     if (sdp == NULL || sdp->extended == 0) {
13622         return;
13623     }
13624     ovp = (ObjValNodePtr)sdp;
13625     sep = SeqMgrGetSeqEntryForData(ovp->idx.parentptr);
13626     RemoveAutodefObjects(sep);
13627 }
13628 
13629 //not testing construction of autodef options object at this time
AddAutoDefUserObjectCallback(BioseqPtr bsp,Pointer data)13630 void AddAutoDefUserObjectCallback(BioseqPtr bsp, Pointer data)
13631 {
13632     UserObjectPtr uop, cpy;
13633     SeqDescPtr sdp;
13634 
13635     if (bsp == NULL || ISA_aa(bsp->mol) || (uop = (UserObjectPtr)data) == NULL) {
13636         return;
13637     }
13638     cpy = (UserObjectPtr)AsnIoMemCopy(uop,
13639         (AsnReadFunc)UserObjectAsnRead, (AsnWriteFunc)UserObjectAsnWrite);
13640     sdp = CreateNewDescriptorOnBioseq(bsp, Seq_descr_user);
13641     sdp->data.ptrvalue = cpy;
13642 }
13643 
13644 //not testing construction of autodef options object at this time
AddAutoDefUserObjectToSeqEntry(SeqEntryPtr sep,UserObjectPtr uop)13645 NLM_EXTERN void AddAutoDefUserObjectToSeqEntry(SeqEntryPtr sep, UserObjectPtr uop)
13646 {
13647     if (sep == NULL) return;
13648 
13649     RemoveAutodefObjects(sep);
13650     VisitBioseqsInSep(sep, uop, AddAutoDefUserObjectCallback);
13651 }
13652 //LCOV_EXCL_STOP
13653 
13654 NLM_EXTERN void
AutoDefForSeqEntryEx(SeqEntryPtr sep,Uint2 entityID,OrganismDescriptionModifiersPtr odmp,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,DeflineFeatureRequestListPtr feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand,Boolean update_options)13655 AutoDefForSeqEntryEx
13656 (SeqEntryPtr sep,
13657 Uint2 entityID,
13658 OrganismDescriptionModifiersPtr odmp,
13659 ModifierItemLocalPtr modList,
13660 ValNodePtr modifier_indices,
13661 DeflineFeatureRequestListPtr feature_requests,
13662 Int2 product_flag,
13663 Boolean alternate_splice_flag,
13664 Boolean gene_cluster_opp_strand,
13665 Boolean update_options)
13666 {
13667 
13668   ValNodePtr defline_clauses = NULL;
13669   UserObjectPtr uop;
13670 
13671   if (sep == NULL) return;
13672 
13673   RemoveNucProtSetTitles (sep);
13674 
13675   SeqEntrySetScope (sep);
13676 
13677 
13678   BuildDefLineFeatClauseList (sep, entityID,
13679                               feature_requests,
13680                               product_flag, alternate_splice_flag,
13681                               gene_cluster_opp_strand,
13682                               &defline_clauses);
13683 
13684 /*  dlfp->feature_requests.suppressed_feature_list = ValNodeFree (dlfp->feature_requests.suppressed_feature_list);                               */
13685 
13686   BuildDefinitionLinesFromFeatureClauseLists (defline_clauses, modList,
13687                                               modifier_indices, odmp);
13688   DefLineFeatClauseListFree (defline_clauses);
13689   ClearProteinTitlesInNucProts (entityID, NULL);
13690   InstantiateProteinTitles (entityID, NULL);
13691 
13692   AddPopsetTitles (sep, feature_requests, product_flag,
13693                    alternate_splice_flag, gene_cluster_opp_strand);
13694 
13695   if (update_options) {
13696       uop = MakeAutoDefOptionsUserObject(odmp, modList, modifier_indices,
13697           feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand);
13698       AddAutoDefUserObjectToSeqEntry(sep, uop);
13699       uop = UserObjectFree(uop);
13700   }
13701 }
13702 
13703 //LCOV_EXCL_START
13704 //not testing construction of autodef options object at this time
13705 NLM_EXTERN void
AutoDefForSeqEntry(SeqEntryPtr sep,Uint2 entityID,OrganismDescriptionModifiersPtr odmp,ModifierItemLocalPtr modList,ValNodePtr modifier_indices,DeflineFeatureRequestListPtr feature_requests,Int2 product_flag,Boolean alternate_splice_flag,Boolean gene_cluster_opp_strand)13706 AutoDefForSeqEntry
13707 (SeqEntryPtr sep,
13708 Uint2 entityID,
13709 OrganismDescriptionModifiersPtr odmp,
13710 ModifierItemLocalPtr modList,
13711 ValNodePtr modifier_indices,
13712 DeflineFeatureRequestListPtr feature_requests,
13713 Int2 product_flag,
13714 Boolean alternate_splice_flag,
13715 Boolean gene_cluster_opp_strand)
13716 {
13717     AutoDefForSeqEntryEx(sep, entityID, odmp, modList, modifier_indices,
13718         feature_requests, product_flag, alternate_splice_flag, gene_cluster_opp_strand, TRUE);
13719 }
13720 //LCOV_EXCL_STOP
13721 
SetBoolFromField(UserFieldPtr field,CharPtr field_name,BoolPtr val)13722 Boolean SetBoolFromField(UserFieldPtr field, CharPtr field_name, BoolPtr val)
13723 {
13724     if (StringICmp(field->label->str, field_name) == 0) {
13725         if (field->choice == 4 && field->data.boolvalue) {
13726             *val = TRUE;
13727         }
13728         return TRUE;
13729     } else {
13730         return FALSE;
13731     }
13732 }
13733 
SetDefLineTypeFromFieldString(UserFieldPtr field,CharPtr match,DefLineType new_val,DefLineType PNTR val)13734 void SetDefLineTypeFromFieldString(UserFieldPtr field, CharPtr match, DefLineType new_val, DefLineType PNTR val)
13735 {
13736     if (field->choice == 1) {
13737         if (StringICmp(field->data.ptrvalue, match) == 0) {
13738             *val = new_val;
13739         }
13740     }
13741 }
13742 
13743 
SetInt4FromFieldString(UserFieldPtr field,CharPtr match,Int4 new_val,Int4Ptr val)13744 Boolean SetInt4FromFieldString(UserFieldPtr field, CharPtr match, Int4 new_val, Int4Ptr val)
13745 {
13746     if (field->choice == 1) {
13747         if (StringICmp(field->data.ptrvalue, match) == 0) {
13748             *val = new_val;
13749             return TRUE;
13750         }
13751     }
13752     return FALSE;
13753 }
13754 
13755 
SetInt2FromFieldString(UserFieldPtr field,CharPtr match,Int2 new_val,Int2Ptr val)13756 void SetInt2FromFieldString(UserFieldPtr field, CharPtr match, Int2 new_val, Int2Ptr val)
13757 {
13758     if (field->choice == 1) {
13759         if (StringICmp(field->data.ptrvalue, match) == 0) {
13760             *val = new_val;
13761         }
13762     }
13763 }
13764 
13765 
SetModifierIndices(ValNodePtr PNTR modifier_indices,UserFieldPtr field)13766 void SetModifierIndices(ValNodePtr PNTR modifier_indices, UserFieldPtr field)
13767 {
13768     UserFieldPtr curr;
13769     CharPtr PNTR cpp;
13770     Int4 i;
13771     Int4 subtype;
13772 
13773     if (modifier_indices == NULL || field == NULL || field->choice != 11) {
13774         return;
13775     }
13776 
13777     for (curr = field->data.ptrvalue; curr != NULL; curr = curr->next) {
13778         if (curr->label != NULL && curr->choice == 7) {
13779             cpp = (CharPtr PNTR) curr->data.ptrvalue;
13780             for (i = 0; i < curr->num; i++) {
13781                 subtype = GetDeflinePosForFieldName(cpp[i]);
13782                 ValNodeAddInt(modifier_indices, 0, subtype);
13783             }
13784         }
13785     }
13786 }
13787 
SetSuppressedFeatures(ValNodePtr PNTR suppressed_features,UserFieldPtr field)13788 void SetSuppressedFeatures(ValNodePtr PNTR suppressed_features, UserFieldPtr field)
13789 {
13790     CharPtr PNTR cpp;
13791     Int4 i;
13792     Uint1 subtype;
13793 
13794     if (suppressed_features == NULL || field == NULL || field->choice != 7) {
13795         return;
13796     }
13797     cpp = (CharPtr PNTR) field->data.ptrvalue;
13798     for (i = 0; i < field->num; i++) {
13799         subtype = (Uint1)GetFeatdefFromFeatureType(GetFeatureTypeByName(cpp[i]));
13800         ValNodeAddPointer(suppressed_features, subtype, NULL);
13801     }
13802 }
13803 
RegenerateAutoDef(BioseqPtr bsp)13804 NLM_EXTERN void RegenerateAutoDef(BioseqPtr bsp)
13805 {
13806     SeqMgrDescContext context;
13807     SeqDescPtr sdp;
13808     SeqEntryPtr sep;
13809     UserObjectPtr uop;
13810     OrganismDescriptionModifiers odm;
13811     ModifierItemLocalPtr modlist;
13812     ValNodePtr modifier_indices = NULL;
13813     DeflineFeatureRequestList feature_request;
13814     Int2 product_flag = DEFAULT_ORGANELLE_CLAUSE;
13815     Boolean alternate_splice_flag = FALSE;
13816     Boolean gene_cluster_opp_strand = FALSE;
13817     UserFieldPtr field;
13818     Int4 index;
13819 
13820     if (bsp == NULL || ISA_aa(bsp->mol)){
13821         return;
13822     }
13823     sep = SeqMgrGetSeqEntryForData(bsp);
13824     if (sep == NULL) {
13825         return;
13826     }
13827 
13828     sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_user, &context);
13829     while (sdp != NULL && !IsAutoDefOptions(sdp->data.ptrvalue)) {
13830         sdp = SeqMgrGetNextDescriptor(bsp, sdp, Seq_descr_user, &context);
13831     }
13832     if (sdp == NULL) {
13833         return;
13834     }
13835 
13836     uop = sdp->data.ptrvalue;
13837 
13838     MemSet(&odm, 0, sizeof(OrganismDescriptionModifiers));
13839     modlist = MemNew(NumDefLineModifiers() * sizeof(ModifierItemLocalData));
13840     InitFeatureRequests(&feature_request);
13841 
13842     for (field = uop->data; field != NULL; field = field->next) {
13843         if (field->label != NULL) {
13844             /* organism */
13845             if (SetBoolFromField(field, kDoNotApplyToAff, &(odm.exclude_aff))) {
13846             } else if (SetBoolFromField(field, kDoNotApplyToCf, &(odm.exclude_cf))) {
13847             } else if (SetBoolFromField(field, kDoNotApplyToNr, &(odm.exclude_nr))) {
13848             } else if (SetBoolFromField(field, kDoNotApplyToCf, &(odm.exclude_cf))) {
13849             } else if (SetBoolFromField(field, kDoNotApplyToSp, &(odm.exclude_sp))) {
13850             } else if (SetBoolFromField(field, kGeneClusterOppStrand, &gene_cluster_opp_strand)) {
13851             } else if (SetBoolFromField(field, kIncludeCountryText, &(odm.include_country_extra))) {
13852             } else if (SetBoolFromField(field, kKeepAfterSemicolon, &(odm.allow_semicolon_in_modifier))) {
13853             } else if (SetBoolFromField(field, kLeaveParenthetical, &(odm.keep_paren))) {
13854             } else if (SetBoolFromField(field, kUseLabels, &(odm.use_labels))) {
13855             } else if (SetBoolFromField(field, kAllowModAtEndOfTaxname, &(odm.allow_mod_at_end_of_taxname))) {
13856             } else if (StringICmp(field->label->str, kHIVRule) == 0) {
13857                 if (SetInt4FromFieldString(field, kPreferClone, clone_isolate_HIV_rule_prefer_clone, &(odm.clone_isolate_HIV_rule_num)) ||
13858                     SetInt4FromFieldString(field, kPreferIsolate, clone_isolate_HIV_rule_prefer_isolate, &(odm.clone_isolate_HIV_rule_num)) ||
13859                     SetInt4FromFieldString(field, kWantBoth, clone_isolate_HIV_rule_want_both, &(odm.clone_isolate_HIV_rule_num))) {
13860                     odm.use_modifiers = TRUE;
13861                 }
13862             } else if (StringICmp(field->label->str, kModifierList) == 0) {
13863                 SetModifierIndices(&modifier_indices, field);
13864             /* features */
13865             } else if (SetBoolFromField(field, kAltSpliceFlag, &(alternate_splice_flag))) {
13866             } else if (SetBoolFromField(field, kKeep3UTRs, &(feature_request.keep_items[Removable3UTR]))) {
13867             } else if (SetBoolFromField(field, kKeep5UTRs, &(feature_request.keep_items[Removable5UTR]))) {
13868             } else if (SetBoolFromField(field, kKeepExons, &(feature_request.keep_items[RemovableExon]))) {
13869             } else if (SetBoolFromField(field, kKeepIntrons, &(feature_request.keep_items[RemovableIntron]))) {
13870             } else if (SetBoolFromField(field, kKeepLTRs, &(feature_request.keep_items[RemovableLTR]))) {
13871             } else if (SetBoolFromField(field, kKeepPromoters, &(feature_request.keep_items[RemovablePromoter]))) {
13872             } else if (SetBoolFromField(field, kKeepuOrf, &(feature_request.keep_items[RemovableuORF]))) {
13873             } else if (SetBoolFromField(field, kKeepMobileElement, &(feature_request.keep_items[RemovableMobileElement]))) {
13874             } else if (SetBoolFromField(field, kKeepNoncodingProductFeat, &(feature_request.keep_items[RemovableNoncodingProductFeat]))) {
13875             } else if (SetBoolFromField(field, kKeepPrecursorRNA, &(feature_request.keep_items[RemovablePrecursorRNA]))) {
13876             } else if (SetBoolFromField(field, kKeepncRNA, &(feature_request.keep_items[RemovablencRNA]))) {
13877             } else if (SetBoolFromField(field, kKeepRepeatRegion, &(feature_request.keep_items[RemovableRepeatRegion]))) {
13878             } else if (SetBoolFromField(field, kUseFakePromoters, &(feature_request.add_fake_promoters))) {
13879             } else if (SetBoolFromField(field, kSuppressFeatureAltSplice, &(feature_request.suppress_alt_splice_phrase))) {
13880             } else if (SetBoolFromField(field, kSuppressLocusTags, &(feature_request.suppress_locus_tags))) {
13881             } else if (SetBoolFromField(field, kSuppressMobileElementSubfeatures, &(feature_request.remove_subfeatures))) {
13882             } else if (SetBoolFromField(field, kUseNcRNAComment, &(feature_request.use_ncrna_note))) {
13883             } else if (SetBoolFromField(field, kSuppressAllele, &(feature_request.suppress_allele))) {
13884             } else if (StringICmp(field->label->str, kSpecifyNuclearProduct) == 0) {
13885                 if (field->choice == 4 && field->data.boolvalue) {
13886                     product_flag = DEFAULT_ORGANELLE_CLAUSE;
13887                 }
13888             } else if (StringICmp(field->label->str, kMaxMods) == 0) {
13889                 if (field->choice == 2) {
13890                     odm.max_mods = field->data.intvalue;
13891                 }
13892             } else if (StringICmp(field->label->str, kFeatureListType) == 0) {
13893                 SetDefLineTypeFromFieldString(field, kCompleteGenome, DEFLINE_COMPLETE_GENOME, &(feature_request.feature_list_type));
13894                 SetDefLineTypeFromFieldString(field, kCompleteSequence, DEFLINE_COMPLETE_SEQUENCE, &(feature_request.feature_list_type));
13895                 SetDefLineTypeFromFieldString(field, kPartialGenome, DEFLINE_PARTIAL_GENOME, &(feature_request.feature_list_type));
13896                 SetDefLineTypeFromFieldString(field, kPartialSequence, DEFLINE_PARTIAL_SEQUENCE, &(feature_request.feature_list_type));
13897                 SetDefLineTypeFromFieldString(field, kSequence, DEFLINE_SEQUENCE, &(feature_request.feature_list_type));
13898                 SetDefLineTypeFromFieldString(field, kListAllFeatures, DEFLINE_USE_FEATURES, &(feature_request.feature_list_type));
13899             } else if (StringICmp(field->label->str, kMiscFeatRule) == 0) {
13900                 SetInt4FromFieldString(field, kCommentFeat, 1, &(feature_request.misc_feat_parse_rule));
13901                 SetInt4FromFieldString(field, kDelete, 2, &(feature_request.misc_feat_parse_rule));
13902                 SetInt4FromFieldString(field, kNoncodingProductFeat, 3, &(feature_request.misc_feat_parse_rule));
13903             } else if (StringICmp(field->label->str, kProductFlag) == 0) {
13904                 SetInt2FromFieldString(field, "mitochondrial", 1, &product_flag);
13905                 SetInt2FromFieldString(field, "chloroplast", 2, &product_flag);
13906                 SetInt2FromFieldString(field, "kinetoplast", 3, &product_flag);
13907                 SetInt2FromFieldString(field, "plastid", 4, &product_flag);
13908                 SetInt2FromFieldString(field, "chromoplast", 5, &product_flag);
13909                 SetInt2FromFieldString(field, "cyanelle", 6, &product_flag);
13910                 SetInt2FromFieldString(field, "apicoplast", 7, &product_flag);
13911                 SetInt2FromFieldString(field, "leucoplast", 8, &product_flag);
13912                 SetInt2FromFieldString(field, "proplastid", 9, &product_flag);
13913             } else if (StringICmp(field->label->str, kSuppressedFeatures) == 0) {
13914                 SetSuppressedFeatures(&(feature_request.suppressed_feature_list), field);
13915             }
13916         }
13917     }
13918     if (modifier_indices != NULL) {
13919         odm.use_modifiers = TRUE;
13920     }
13921 
13922     AutoDefForSeqEntryEx(sep, bsp->idx.entityID, &odm, modlist, modifier_indices,
13923         &feature_request, product_flag, alternate_splice_flag,
13924         gene_cluster_opp_strand, FALSE);
13925 
13926     /* cleanup */
13927     if (modlist != NULL) {
13928         for (index = 0; index < NumDefLineModifiers(); index++) {
13929             ValNodeFree(modlist[index].values_seen);
13930         }
13931         MemFree(modlist);
13932     }
13933 
13934     modifier_indices = ValNodeFree(modifier_indices);
13935 
13936 }
13937 
13938 
13939 //LCOV_EXCL_START
13940 //Not part of Autodef or Cleanup
DoTbl2AsnAutoDef(SeqEntryPtr sep,Uint2 entityID)13941 NLM_EXTERN void DoTbl2AsnAutoDef(SeqEntryPtr sep, Uint2 entityID)
13942 
13943 {
13944     ValNodePtr                    defline_clauses = NULL;
13945     DeflineFeatureRequestList     feature_requests;
13946     size_t                        index;
13947     ValNodePtr                    modifier_indices = NULL;
13948     ModifierItemLocalPtr          modList;
13949     OrganismDescriptionModifiers  odmp;
13950     SeqEntryPtr                   oldscope;
13951 
13952     if (sep == NULL) return;
13953     if (entityID < 1) return;
13954 
13955     modList = MemNew(NumDefLineModifiers() * sizeof(ModifierItemLocalData));
13956     if (modList == NULL) return;
13957 
13958     InitFeatureRequests(&feature_requests);
13959 
13960     SetRequiredModifiers(modList);
13961     CountModifiers(modList, sep);
13962 
13963     InitOrganismDescriptionModifiers(&odmp, sep);
13964 
13965     RemoveNucProtSetTitles(sep);
13966     oldscope = SeqEntrySetScope(sep);
13967 
13968     BuildDefLineFeatClauseList(sep, entityID, &feature_requests,
13969         DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE,
13970         &defline_clauses);
13971     if (AreFeatureClausesUnique(defline_clauses)) {
13972         modifier_indices = GetModifierIndicesFromModList(modList);
13973     }
13974     else {
13975         modifier_indices = FindBestModifiers(sep, modList);
13976     }
13977 
13978     BuildDefinitionLinesFromFeatureClauseLists(defline_clauses, modList,
13979         modifier_indices, &odmp);
13980     DefLineFeatClauseListFree(defline_clauses);
13981     if (modList != NULL) {
13982         for (index = 0; index < NumDefLineModifiers(); index++) {
13983             ValNodeFree(modList[index].values_seen);
13984         }
13985         MemFree(modList);
13986     }
13987     modifier_indices = ValNodeFree(modifier_indices);
13988 
13989     ClearProteinTitlesInNucProts(entityID, NULL);
13990     InstantiateProteinTitles(entityID, NULL);
13991     /*
13992     RemovePopsetTitles (sep);
13993     */
13994     AddPopsetTitles(sep, &feature_requests, DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE);
13995 
13996     SeqEntrySetScope(oldscope);
13997 }
13998 
13999 
14000 /* Retro PopSet Title Functions */
14001 //Not part of Autodef or Cleanup (no more popset retros)
BioseqHasTitleOrNucProtSetHasTitle(BioseqPtr bsp)14002 static SeqDescPtr BioseqHasTitleOrNucProtSetHasTitle (BioseqPtr bsp)
14003 {
14004   SeqDescPtr sdp = NULL;
14005   SeqMgrDescContext context;
14006   BioseqSetPtr bssp;
14007   ObjValNodePtr ovn;
14008 
14009   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
14010   while (sdp != NULL) {
14011     if (sdp->extended) {
14012       ovn = (ObjValNodePtr) sdp;
14013       if (ovn->idx.parentptr == bsp) {
14014         return sdp;
14015       } else if (ovn->idx.parenttype == OBJ_BIOSEQSET
14016                  && (bssp = (BioseqSetPtr) ovn->idx.parentptr) != NULL
14017                  && bssp->_class == BioseqseqSet_class_nuc_prot) {
14018         return sdp;
14019       }
14020     }
14021     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &context);
14022   }
14023   return NULL;
14024 }
14025 
14026 
14027 //Not part of Autodef or Cleanup (no more popset retros)
FindFirstNucBioseqWithTitle(SeqEntryPtr sep)14028 static BioseqPtr FindFirstNucBioseqWithTitle (SeqEntryPtr sep)
14029 
14030 {
14031   BioseqPtr     bsp;
14032   BioseqSetPtr  bssp;
14033 
14034   if (sep == NULL || sep->data.ptrvalue == NULL ||
14035       /* sep->choice < 0 || */ sep->choice > 2) return NULL;
14036   if (IS_Bioseq (sep) ) {
14037     bsp = (BioseqPtr) sep->data.ptrvalue;
14038     if (!ISA_aa (bsp->mol) && BioseqHasTitleOrNucProtSetHasTitle(bsp) != NULL) {
14039       return bsp;
14040     } else {
14041       return NULL;
14042     }
14043   }
14044   bssp = (BioseqSetPtr) sep->data.ptrvalue;
14045   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
14046     bsp = FindFirstNucBioseqWithTitle (sep);
14047     if (bsp != NULL) return bsp;
14048   }
14049   return NULL;
14050 }
14051 
14052 
14053 //Not part of Autodef or Cleanup (no more popset retros)
RetroPopSetAutoDefCallback(BioseqSetPtr bssp,Pointer data)14054 static void RetroPopSetAutoDefCallback (BioseqSetPtr bssp, Pointer data)
14055 {
14056   SeqEntryPtr set_sep;
14057   BioseqPtr   first_bsp;
14058   DefLineClauseOptionsPtr clause_options = NULL;
14059   CharPtr                    common_title = NULL;
14060   SeqEntryPtr oldscope, sep;
14061   PopSetRetroStatPtr stat = (PopSetRetroStatPtr) data;
14062 
14063   if (bssp == NULL || !GetsDocsumTitle (bssp->_class) || HasTitle (bssp->descr)) {
14064     return;
14065   }
14066 
14067   set_sep = SeqMgrGetSeqEntryForData (bssp);
14068   first_bsp = FindFirstNucBioseqWithTitle (set_sep);
14069   if (first_bsp == NULL) {
14070     if (stat != NULL) {
14071       stat->uncalculatable++;
14072     }
14073     return;
14074   }
14075   sep = GetTopSeqEntryForEntityID (bssp->idx.entityID);
14076 
14077   /* infer feature list from first title */
14078   clause_options = MakeFeatureRequestsMatchExpectedTitle (first_bsp);
14079 
14080   if (clause_options == NULL) {
14081     common_title = GetCommonFeatureClause (set_sep);
14082     if (common_title == NULL) {
14083       if (stat != NULL) {
14084         stat->uncalculatable++;
14085       }
14086     } else {
14087       AddPopsetDeflineWithClause (bssp, common_title);
14088       common_title = MemFree (common_title);
14089       if (stat != NULL) {
14090         stat->common_title++;
14091         stat->title_added = TRUE;
14092       }
14093     }
14094   } else {
14095     oldscope = SeqEntrySetScope (sep);
14096 
14097     AddPopsetTitles (sep, &(clause_options->feature_requests),
14098                      clause_options->product_flag,
14099                      clause_options->alternate_splice_flag,
14100                      clause_options->gene_cluster_opp_strand);
14101 
14102     SeqEntrySetScope (oldscope);
14103     clause_options = DefLineClauseOptionsFree (clause_options);
14104     if (stat != NULL) {
14105       stat->feature_clause++;
14106       stat->title_added = TRUE;
14107     }
14108   }
14109 }
14110 
14111 
14112 //Not part of Autodef or Cleanup (no more popset retros)
PopSetAutoDefRetro(SeqEntryPtr sep,PopSetRetroStatPtr stat)14113 NLM_EXTERN void PopSetAutoDefRetro (SeqEntryPtr sep, PopSetRetroStatPtr stat)
14114 
14115 {
14116   SeqEntryPtr oldscope;
14117 
14118   if (sep == NULL) return;
14119 
14120   oldscope = SeqEntrySetScope (sep);
14121 
14122   VisitSetsInSep (sep, stat, RetroPopSetAutoDefCallback);
14123 
14124   SeqEntrySetScope (oldscope);
14125 }
14126 
14127 
14128 //Not used for Autodef or Cleanup
14129 /* functions for editing seq-locs */
ExtendSeqLocToEnd(SeqLocPtr slp,BioseqPtr bsp,Boolean end5)14130 NLM_EXTERN Int4 ExtendSeqLocToEnd (SeqLocPtr slp, BioseqPtr bsp, Boolean end5)
14131 {
14132   Uint1          strand;
14133   SeqLocPtr      slp_to_change, slp_index;
14134   Int4           extent_to_change;
14135   Int4           start, stop;
14136   SeqIdPtr       sip;
14137   Int4           start_diff = 0;
14138 
14139   if (slp == NULL || bsp == NULL) return 0;
14140 
14141   slp_to_change = NULL;
14142   strand = SeqLocStrand (slp);
14143   switch (slp->choice)
14144   {
14145     case SEQLOC_INT:
14146       slp_to_change = slp;
14147       break;
14148     case SEQLOC_MIX:
14149       case SEQLOC_PACKED_INT:
14150       sip = SeqLocId (slp);
14151       if (sip == NULL) return 0; /* can only process if all on one bioseq */
14152       slp_to_change = NULL;
14153       if ((strand == Seq_strand_minus && end5)
14154         || (strand != Seq_strand_minus && !end5))
14155       {
14156         extent_to_change = 0;
14157         for (slp_index = (SeqLocPtr)slp->data.ptrvalue; slp_index != NULL; slp_index = slp_index->next)
14158         {
14159           stop = GetOffsetInBioseq (slp_index, bsp, SEQLOC_STOP);
14160           if (stop > extent_to_change)
14161           {
14162             slp_to_change = slp_index;
14163             extent_to_change = stop;
14164           }
14165         }
14166       }
14167       else
14168       {
14169         extent_to_change = bsp->length;
14170         for (slp_index = (SeqLocPtr)slp->data.ptrvalue; slp_index != NULL; slp_index = slp_index->next)
14171         {
14172           start = GetOffsetInBioseq (slp_index, bsp, SEQLOC_START);
14173           if (start < extent_to_change)
14174           {
14175             slp_to_change = slp_index;
14176             extent_to_change = start;
14177           }
14178         }
14179       }
14180       break;
14181   }
14182 
14183   if (slp_to_change != NULL)
14184   {
14185     if ((strand == Seq_strand_minus && end5)
14186       || (strand != Seq_strand_minus && !end5))
14187     {
14188       start = GetOffsetInBioseq (slp_to_change, bsp, SEQLOC_START);
14189       stop = bsp->length - 1;
14190     }
14191     else
14192     {
14193       start = 0;
14194       stop = GetOffsetInBioseq (slp_to_change, bsp, SEQLOC_STOP);
14195     }
14196     if (end5) {
14197         if (strand == Seq_strand_minus) {
14198             start_diff = bsp->length - 1 - GetOffsetInBioseq(slp_to_change, bsp, SEQLOC_START);
14199         } else {
14200             start_diff = GetOffsetInBioseq(slp_to_change, bsp, SEQLOC_START);
14201         }
14202     }
14203 
14204     expand_seq_loc (start, stop, strand, slp_to_change);
14205   }
14206   return start_diff;
14207 }
14208 
14209 /* functions for feature conversion.  shared by sequin5 and macroapi */
14210 //Not used for Autodef or Cleanup
IsBioseqSetInGPS(BioseqSetPtr bssp)14211 NLM_EXTERN Boolean IsBioseqSetInGPS (BioseqSetPtr bssp)
14212 {
14213   if (bssp == NULL) return FALSE;
14214   if (bssp->_class == BioseqseqSet_class_gen_prod_set) return TRUE;
14215   if (bssp->idx.parentptr == NULL || bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE;
14216   return IsBioseqSetInGPS ((BioseqSetPtr) bssp->idx.parentptr);
14217 }
14218 
14219 //Not used for Autodef or Cleanup
IsBioseqInGPS(BioseqPtr bsp)14220 NLM_EXTERN Boolean IsBioseqInGPS (BioseqPtr bsp)
14221 {
14222   if (bsp == NULL || bsp->idx.parentptr == NULL || bsp->idx.parenttype != OBJ_BIOSEQSET)
14223   {
14224     return FALSE;
14225   }
14226   else
14227   {
14228     return IsBioseqSetInGPS ((BioseqSetPtr) bsp->idx.parentptr);
14229   }
14230 }
14231 
14232 //Not used for Autodef or Cleanup
IsFeatInGPS(SeqFeatPtr sfp)14233 NLM_EXTERN Boolean IsFeatInGPS (SeqFeatPtr sfp)
14234 {
14235   if (sfp == NULL) return FALSE;
14236   return IsBioseqInGPS (BioseqFindFromSeqLoc (sfp->location));
14237 }
14238 
14239 //Not used for Autodef or Cleanup
RnaRefFromLabel(Uint2 featdef_to,CharPtr label,BoolPtr add_label_to_comment)14240 NLM_EXTERN RnaRefPtr RnaRefFromLabel (Uint2 featdef_to, CharPtr label, BoolPtr add_label_to_comment)
14241 {
14242   RnaRefPtr rrp;
14243   tRNAPtr   trp = NULL;
14244   Boolean   just_trna_text;
14245   Int4      j;
14246 
14247   rrp = RnaRefNew ();
14248   if (NULL == rrp)
14249     return NULL;
14250 
14251   *add_label_to_comment = FALSE;
14252 
14253   switch (featdef_to)
14254   {
14255     case FEATDEF_preRNA :
14256       rrp->type = 1;
14257       break;
14258     case FEATDEF_mRNA :
14259       rrp->type = 2;
14260       break;
14261     case FEATDEF_tRNA :
14262       rrp->type = 3;
14263       break;
14264     case FEATDEF_rRNA :
14265       rrp->type = 4;
14266       break;
14267     case FEATDEF_snRNA :
14268       rrp->type = 8;
14269       break;
14270     case FEATDEF_scRNA :
14271       rrp->type = 8;
14272       break;
14273     case FEATDEF_snoRNA :
14274       rrp->type = 8;
14275       break;
14276     case FEATDEF_ncRNA:
14277       rrp->type = 8;
14278       break;
14279     case FEATDEF_tmRNA:
14280       rrp->type = 9;
14281       break;
14282     case FEATDEF_misc_RNA:
14283       rrp->type = 10;
14284       break;
14285     case FEATDEF_otherRNA :
14286       rrp->type = 255;
14287       break;
14288     default :
14289       break;
14290   }
14291 
14292   if (featdef_to == FEATDEF_tRNA)
14293   {
14294     trp = (tRNAPtr) MemNew (sizeof (tRNA));
14295     rrp->ext.choice = 2;
14296     rrp->ext.value.ptrvalue = (Pointer) trp;
14297     trp->aa = ParseTRnaString (label, &just_trna_text, NULL, FALSE);
14298     trp->aatype = 2;
14299     for (j = 0; j < 6; j++) {
14300         trp->codon [j] = 255;
14301     }
14302     if (!just_trna_text)
14303     {
14304       *add_label_to_comment = TRUE;
14305     }
14306   }
14307   else
14308   {
14309     SetRNARefProductString (rrp, NULL, label, ExistingTextOption_replace_old);
14310   }
14311   return rrp;
14312 }
14313 
14314 //Not used for Autodef or Cleanup
ConvertProtToProtFunc(SeqFeatPtr sfp,Uint2 featdef_to)14315 NLM_EXTERN Boolean ConvertProtToProtFunc
14316 (SeqFeatPtr sfp,
14317  Uint2      featdef_to)
14318 {
14319   ProtRefPtr prp;
14320 
14321   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
14322   if (NULL == prp)
14323     return FALSE;
14324 
14325   switch (featdef_to) {
14326     case FEATDEF_PROT :
14327       prp->processed = 0;
14328       break;
14329     case FEATDEF_preprotein :
14330       prp->processed = 1;
14331       break;
14332     case FEATDEF_mat_peptide_aa :
14333       prp->processed = 2;
14334       break;
14335     case FEATDEF_sig_peptide_aa :
14336       prp->processed = 3;
14337       break;
14338     case FEATDEF_transit_peptide_aa :
14339       prp->processed = 4;
14340       break;
14341     case FEATDEF_propeptide_aa :
14342       prp->processed = 5;
14343       break;
14344     default :
14345       break;
14346   }
14347   return TRUE;
14348 }
14349 
14350 //Not used for Autodef or Cleanup
14351 NLM_EXTERN void
ApplyCDSOptionsToFeature(SeqFeatPtr sfp,Boolean remove_mRNA,Boolean remove_gene,Boolean remove_transcript_id,Boolean keep_original)14352 ApplyCDSOptionsToFeature
14353 (SeqFeatPtr sfp,
14354  Boolean remove_mRNA,
14355  Boolean remove_gene,
14356  Boolean remove_transcript_id,
14357  Boolean keep_original)
14358 {
14359   BioseqPtr         product_bsp;
14360   SeqFeatPtr        gene, mrna;
14361   SeqMgrFeatContext fcontext;
14362 
14363   if (sfp == NULL) return;
14364 
14365   if (sfp->product != NULL) {
14366     product_bsp = BioseqFindFromSeqLoc (sfp->product);
14367     if (product_bsp != NULL && !keep_original)
14368     {
14369       product_bsp->idx.deleteme = TRUE;
14370     }
14371     if (!IsFeatInGPS (sfp) || remove_transcript_id)
14372     {
14373       sfp->product = SeqLocFree (sfp->product);
14374     }
14375   }
14376 
14377   if (remove_gene)
14378   {
14379     gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
14380     if (gene != NULL)
14381     {
14382       gene->idx.deleteme = TRUE;
14383     }
14384   }
14385 
14386   if (remove_mRNA)
14387   {
14388     mrna = SeqMgrGetOverlappingmRNA (sfp->location, &fcontext);
14389     if (mrna != NULL)
14390     {
14391       mrna->idx.deleteme = TRUE;
14392     }
14393   }
14394 
14395 }
14396 
14397 //Not used for Autodef or Cleanup
14398 NLM_EXTERN Boolean
ConvertCDSToRNA(SeqFeatPtr sfp,Uint2 rna_type)14399 ConvertCDSToRNA
14400 (SeqFeatPtr  sfp,
14401  Uint2       rna_type)
14402 {
14403   Char                   label [256];
14404   CharPtr                new_comment;
14405   Int4                   comment_len = 0;
14406   Boolean                add_label_to_comment = FALSE;
14407 
14408   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE;
14409 
14410   FeatDefLabel (sfp, label, sizeof (label), OM_LABEL_CONTENT);
14411 
14412   sfp->data.value.ptrvalue =
14413       CdRegionFree ((CdRegionPtr) sfp->data.value.ptrvalue);
14414 
14415   sfp->data.choice = SEQFEAT_RNA;
14416   sfp->data.value.ptrvalue = RnaRefFromLabel (rna_type, label, &add_label_to_comment);
14417 
14418   if (add_label_to_comment && StringCmp (label, sfp->comment) != 0)
14419   {
14420     if (StringHasNoText (sfp->comment))
14421     {
14422       new_comment = StringSave (label);
14423     }
14424     else
14425     {
14426       comment_len = StringLen (sfp->comment) + StringLen (label) + 3;
14427       new_comment = (CharPtr) MemNew (sizeof (Char) * comment_len);
14428       sprintf (new_comment, "%s; %s", sfp->comment, label);
14429     }
14430     sfp->comment = MemFree (sfp->comment);
14431     sfp->comment = new_comment;
14432   }
14433   /* change subtype so that feature will be reindexed */
14434   sfp->idx.subtype = 0;
14435 
14436   return TRUE;
14437 }
14438 
14439 //Not used for Autodef or Cleanup
ConvertGeneToRNA(SeqFeatPtr sfp,Uint2 featdef_to)14440 NLM_EXTERN Boolean ConvertGeneToRNA (SeqFeatPtr sfp, Uint2 featdef_to)
14441 {
14442   Char                   label [256];
14443   GeneRefPtr grp;
14444   Boolean    add_label_to_comment = FALSE;
14445   CharPtr    new_comment;
14446   Int4       comment_len = 0;
14447 
14448   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return FALSE;
14449 
14450   FeatDefLabel (sfp, label, sizeof (label), OM_LABEL_CONTENT);
14451 
14452   grp = (GeneRefPtr) sfp->data.value.ptrvalue;
14453 
14454   sfp->data.choice = SEQFEAT_RNA;
14455   sfp->data.value.ptrvalue = RnaRefFromLabel (featdef_to, label, &add_label_to_comment);
14456 
14457   if (add_label_to_comment)
14458   {
14459     comment_len += StringLen (label) + 2;
14460   }
14461   if (grp != NULL)
14462   {
14463     if (!StringHasNoText (grp->locus) && StringCmp (grp->locus, label) != 0)
14464     {
14465       comment_len += StringLen (grp->locus) + 2;
14466     }
14467     if (!StringHasNoText (grp->allele) && StringCmp (grp->allele, label) != 0)
14468     {
14469       comment_len += StringLen (grp->allele) + 2;
14470     }
14471     if (!StringHasNoText (grp->desc) && StringCmp (grp->desc, label) != 0)
14472     {
14473       comment_len += StringLen (grp->desc) + 2;
14474     }
14475     if (!StringHasNoText (grp->maploc) && StringCmp (grp->maploc, label) != 0)
14476     {
14477       comment_len += StringLen (grp->maploc) + 2;
14478     }
14479     if (!StringHasNoText (grp->locus_tag) && StringCmp (grp->locus_tag, label) != 0)
14480     {
14481       comment_len += StringLen (grp->locus_tag) + 2;
14482     }
14483   }
14484   if (comment_len > 0)
14485   {
14486     comment_len += StringLen (sfp->comment) + 3;
14487     new_comment = (CharPtr) MemNew (sizeof (Char) * comment_len);
14488     if (!StringHasNoText (sfp->comment))
14489     {
14490       StringCat (new_comment, sfp->comment);
14491       StringCat (new_comment, "; ");
14492     }
14493     if (add_label_to_comment)
14494     {
14495       StringCat (new_comment, label);
14496       StringCat (new_comment, "; ");
14497     }
14498     /* append unused gene qualifiers */
14499     if (grp != NULL)
14500     {
14501       if (!StringHasNoText (grp->locus) && StringCmp (grp->locus, label) != 0)
14502       {
14503         StringCat (new_comment, grp->locus);
14504         StringCat (new_comment, "; ");
14505       }
14506       if (!StringHasNoText (grp->allele) && StringCmp (grp->allele, label) != 0)
14507       {
14508         StringCat (new_comment, grp->allele);
14509         StringCat (new_comment, "; ");
14510       }
14511       if (!StringHasNoText (grp->desc) && StringCmp (grp->desc, label) != 0)
14512       {
14513         StringCat (new_comment, grp->desc);
14514         StringCat (new_comment, "; ");
14515       }
14516       if (!StringHasNoText (grp->maploc) && StringCmp (grp->maploc, label) != 0)
14517       {
14518         StringCat (new_comment, grp->maploc);
14519         StringCat (new_comment, "; ");
14520       }
14521       if (!StringHasNoText (grp->locus_tag) && StringCmp (grp->locus_tag, label) != 0)
14522       {
14523         StringCat (new_comment, grp->locus_tag);
14524         StringCat (new_comment, "; ");
14525       }
14526     }
14527     /* remove last semicolon */
14528     new_comment[StringLen (new_comment) - 2] = 0;
14529     sfp->comment = MemFree (sfp->comment);
14530     sfp->comment = new_comment;
14531 
14532   }
14533 
14534   /* free generef */
14535   grp = GeneRefFree (grp);
14536 
14537   return TRUE;
14538 }
14539 
14540 
14541 /* These functions are used for converting features on nucleotide sequences to
14542  * features on protein sequences */
14543 //Not used for Autodef or Cleanup
14544 /* copied from seqport.c, for the benefit of load_fuzz_to_DNA */
add_fuzziness_to_loc(SeqLocPtr slp,Boolean less)14545 static Boolean add_fuzziness_to_loc (SeqLocPtr slp, Boolean less)
14546 {
14547     IntFuzzPtr ifp;
14548     SeqIntPtr sint;
14549     SeqPntPtr spnt;
14550 
14551     sint = NULL;
14552     spnt = NULL;
14553 
14554     if(slp->choice == SEQLOC_INT)
14555         sint = (SeqIntPtr) slp->data.ptrvalue;
14556     else
14557     {
14558         if(slp->choice == SEQLOC_PNT)
14559             spnt = (SeqPntPtr) slp->data.ptrvalue;
14560         else
14561             return FALSE;
14562     }
14563     ifp = IntFuzzNew();
14564     ifp->choice = 4;
14565     ifp->a = less ? 2 : 1;
14566 
14567     if(spnt != NULL)
14568         spnt->fuzz = ifp;
14569     else if (sint != NULL)
14570     {
14571         if(less)
14572             sint->if_from = ifp;
14573         else
14574             sint->if_to = ifp;
14575     }
14576 
14577     return TRUE;
14578 }
14579 
14580 //Not used for Autodef or Cleanup
14581 /* copied from seqport.c, for the benefit of MYdnaLoc_to_aaLoc */
load_fuzz_to_DNA(SeqLocPtr dnaLoc,SeqLocPtr aaLoc,Boolean first)14582 static Boolean load_fuzz_to_DNA(SeqLocPtr dnaLoc, SeqLocPtr aaLoc, Boolean
14583 first)
14584 {
14585     Uint1 strand;
14586     SeqPntPtr spnt;
14587     SeqIntPtr sint;
14588     IntFuzzPtr ifp;
14589     Boolean load, less;
14590 
14591     load = FALSE;
14592     strand = SeqLocStrand(aaLoc);
14593     if(aaLoc->choice == SEQLOC_INT)
14594     {
14595         sint = (SeqIntPtr) aaLoc->data.ptrvalue;
14596         if((first && strand != Seq_strand_minus ) ||
14597             (!first && strand == Seq_strand_minus))    /*the first
14598 Seq-loc*/
14599         {
14600             ifp = sint->if_from;
14601             if(ifp && ifp->choice == 4 )
14602                 load = (ifp->a == 2);
14603         }
14604         else
14605         {
14606             ifp = sint->if_to;
14607             if(ifp && ifp->choice == 4)
14608                 load = (ifp->a == 1);
14609         }
14610     }
14611     else if(aaLoc->choice == SEQLOC_PNT)
14612     {
14613         spnt = (SeqPntPtr) aaLoc->data.ptrvalue;
14614         ifp = spnt->fuzz;
14615         if(ifp && ifp->choice == 4)
14616         {
14617             if(first)
14618                 load = (ifp->a == 2);
14619             else
14620                 load = (ifp->a == 1);
14621         }
14622     }
14623 
14624     if(load)
14625     {
14626         if(SeqLocStrand(dnaLoc) == Seq_strand_minus)
14627             less = (first == FALSE);
14628         else
14629             less = first;
14630         add_fuzziness_to_loc (dnaLoc, less);
14631         return TRUE;
14632     }
14633     else
14634         return FALSE;
14635 }
14636 
14637 //Not used for Autodef or Cleanup
MYdnaLoc_to_aaLoc(SeqFeatPtr sfp,SeqLocPtr location_loc,Boolean merge,Int4Ptr frame,Boolean allowTerminator)14638 static SeqLocPtr MYdnaLoc_to_aaLoc(SeqFeatPtr sfp,
14639                                    SeqLocPtr location_loc,
14640                                    Boolean merge,
14641                                    Int4Ptr frame,
14642                                    Boolean allowTerminator)
14643 {
14644     SeqLocPtr aa_loc = NULL, loc;
14645     CdRegionPtr crp;
14646     Int4 product_len, end_pos, frame_offset;
14647     GatherRange gr;
14648     Int4 a_left = 0, a_right, last_aa = -20, aa_from, aa_to;
14649     SeqLocPtr slp, slp1, slp2;
14650     Int2 cmpval;
14651     SeqIdPtr aa_sip;
14652     BioseqPtr bsp;
14653 
14654     if ((sfp == NULL) || (location_loc == NULL)) return aa_loc;
14655     if (sfp->data.choice != 3) return aa_loc;
14656     if (sfp->product == NULL) return aa_loc;
14657 
14658     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
14659     if(crp == NULL) return aa_loc;
14660 
14661     /* each interval of location_loc must be equal to or contained in
14662      * an interval of sfp->location
14663      */
14664     slp1 = SeqLocFindNext (sfp->location, NULL);
14665     slp2 = SeqLocFindNext (location_loc, NULL);
14666     while (slp2 != NULL && slp1 != NULL) {
14667       cmpval = SeqLocCompare (slp2, slp1);
14668       if (cmpval == SLC_A_IN_B || cmpval == SLC_A_EQ_B) {
14669         slp2 = SeqLocFindNext (location_loc, slp2);
14670       } else {
14671         slp1 = SeqLocFindNext (sfp->location, slp1);
14672       }
14673     }
14674     if (slp1 == NULL) return aa_loc;
14675 
14676     aa_sip = SeqLocId(sfp->product);
14677     if (aa_sip == NULL) return aa_loc;
14678     bsp = BioseqLockById(aa_sip);
14679     if (bsp == NULL) return aa_loc;
14680     end_pos = bsp->length - 1;
14681     BioseqUnlock(bsp);
14682 
14683     if(crp->frame == 0)
14684         frame_offset = 0;
14685     else
14686         frame_offset = (Int4)crp->frame-1;
14687 
14688     slp = NULL;
14689     product_len = 0;
14690     loc = NULL;
14691     while ((slp = SeqLocFindNext(sfp->location, slp))!=NULL)
14692     {
14693        if (SeqLocOffset(location_loc, slp, &gr, 0))
14694        {
14695             SeqLocOffset(slp, location_loc, &gr, 0);
14696 
14697             a_left = gr.left + product_len - frame_offset;
14698             a_right = gr.right + product_len - frame_offset;
14699 
14700             aa_from = a_left / 3;
14701             aa_to = a_right / 3;
14702 
14703             if (aa_from < 0)
14704                 aa_from = 0;
14705             if (aa_to > end_pos)
14706                 aa_to = end_pos;
14707 
14708             if (merge)
14709             {
14710                 if (aa_from <= last_aa)  /* overlap due to
14711 codons */
14712                     aa_from = last_aa+1;  /* set up to merge
14713 */
14714             }
14715 
14716             if (aa_from <= aa_to || (allowTerminator && aa_from == aa_to + 1))
14717             {
14718                 if(loc != NULL)
14719                 {
14720                     if(aa_loc == NULL)
14721                         load_fuzz_to_DNA(loc, location_loc, TRUE);
14722                     SeqLocAdd(&aa_loc, loc, merge, FALSE);
14723                 }
14724                 loc = SeqLocIntNew(aa_from, aa_to, 0, aa_sip);
14725                 last_aa = aa_to;
14726             }
14727          }
14728 
14729          product_len += SeqLocLen(slp);
14730     }
14731 
14732     if(loc != NULL)
14733     {
14734         if(aa_loc == NULL)
14735             load_fuzz_to_DNA(loc, location_loc, TRUE);
14736         load_fuzz_to_DNA(loc, location_loc, FALSE);
14737         SeqLocAdd(&aa_loc, loc, merge, FALSE);
14738     }
14739     if (frame != NULL)
14740         *frame = a_left % 3;
14741 
14742     return SeqLocPackage(aa_loc);
14743 }
14744 
14745 //Not used for Autodef or Cleanup
BuildProtLoc(SeqFeatPtr overlapping_cds,SeqLocPtr slp,Int4Ptr frame)14746 NLM_EXTERN SeqLocPtr BuildProtLoc (SeqFeatPtr overlapping_cds, SeqLocPtr slp, Int4Ptr frame)
14747 {
14748   SeqLocPtr tmp_loc, aa_loc = NULL, prot_loc = NULL, last_loc = NULL, next_loc;
14749   Boolean   partial5, partial3;
14750   BioseqPtr prot_bsp;
14751   Boolean   is_ordered = FALSE;
14752   Boolean   first = TRUE;
14753 
14754   prot_bsp = BioseqFindFromSeqLoc (overlapping_cds->product);
14755   if (prot_bsp == NULL) {
14756     return NULL;
14757   }
14758   CheckSeqLocForPartial (slp, &partial5, &partial3);
14759   tmp_loc = SeqLocFindNext (slp, NULL);
14760   while (tmp_loc != NULL) {
14761     if (tmp_loc->choice == SEQLOC_NULL) {
14762       is_ordered = TRUE;
14763     } else {
14764       if (first) {
14765         aa_loc = MYdnaLoc_to_aaLoc (overlapping_cds, tmp_loc, FALSE, frame, FALSE);
14766         first = FALSE;
14767       } else {
14768         aa_loc = MYdnaLoc_to_aaLoc (overlapping_cds, tmp_loc, FALSE, NULL, FALSE);
14769       }
14770     }
14771     if (last_loc == NULL) {
14772       prot_loc = aa_loc;
14773     } else {
14774       last_loc->next = aa_loc;
14775     }
14776     last_loc = aa_loc;
14777     tmp_loc = SeqLocFindNext (slp, tmp_loc);
14778   }
14779   if (prot_loc != NULL && prot_loc->next != NULL) {
14780     tmp_loc = NULL;
14781     for (aa_loc = prot_loc; aa_loc != NULL; aa_loc = next_loc) {
14782       next_loc = aa_loc->next;
14783       aa_loc->next = NULL;
14784 
14785       last_loc = SeqLocMerge (prot_bsp, tmp_loc, aa_loc, FALSE, TRUE, is_ordered);
14786       tmp_loc = SeqLocFree (tmp_loc);
14787       aa_loc = SeqLocFree (aa_loc);
14788       tmp_loc = last_loc;
14789       last_loc = NULL;
14790 
14791       aa_loc = next_loc;
14792     }
14793     prot_loc = tmp_loc;
14794   }
14795   SetSeqLocPartial (prot_loc, partial5, partial3);
14796   return prot_loc;
14797 }
14798 
14799 //Not used for Autodef or Cleanup
ConvertRegionToProtFunc(SeqFeatPtr sfp,Uint2 featdef_to)14800 NLM_EXTERN Boolean ConvertRegionToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to)
14801 {
14802   BioseqPtr  bsp;
14803   ProtRefPtr prp;
14804   SeqFeatPtr cds;
14805   SeqLocPtr  location;
14806 
14807   if (sfp == NULL || sfp->data.choice != SEQFEAT_REGION)
14808   {
14809     return FALSE;
14810   }
14811 
14812   /* only convert features that are on protein sequences */
14813   bsp = BioseqFindFromSeqLoc (sfp->location);
14814   if (!ISA_aa (bsp->mol))
14815   {
14816     cds = SeqMgrGetOverlappingCDS (sfp->location, NULL);
14817     if (cds == NULL)
14818     {
14819       return FALSE;
14820     }
14821     else
14822     {
14823       location = BuildProtLoc (cds, sfp->location, NULL);
14824       sfp->location = SeqLocFree (sfp->location);
14825       sfp->location = location;
14826     }
14827   }
14828 
14829   prp = ProtRefNew ();
14830   if (prp != NULL)
14831   {
14832     prp->name = ValNodeNew(NULL);
14833     if (prp->name != NULL)
14834     {
14835       /* use region name for protein name */
14836       prp->name->data.ptrvalue = sfp->data.value.ptrvalue;
14837       switch (featdef_to)
14838       {
14839         case FEATDEF_PROT :
14840           prp->processed = 0;
14841           break;
14842         case FEATDEF_preprotein :
14843           prp->processed = 1;
14844           break;
14845         case FEATDEF_mat_peptide_aa :
14846           prp->processed = 2;
14847           break;
14848         case FEATDEF_sig_peptide_aa :
14849           prp->processed = 3;
14850           break;
14851         case FEATDEF_transit_peptide_aa :
14852           prp->processed = 4;
14853           break;
14854         case FEATDEF_propeptide_aa :
14855           prp->processed = 5;
14856           break;
14857         default :
14858           break;
14859       }
14860 
14861       sfp->data.value.ptrvalue = prp;
14862       sfp->data.choice = SEQFEAT_PROT;
14863     }
14864   }
14865   return TRUE;
14866 }
14867 
14868 //Not used for Autodef or Cleanup
GetProteinLocationForNucleotideFeatureConversion(SeqLocPtr nuc_slp,BoolPtr no_cds)14869 NLM_EXTERN SeqLocPtr GetProteinLocationForNucleotideFeatureConversion (SeqLocPtr nuc_slp, BoolPtr no_cds)
14870 {
14871   SeqFeatPtr cds;
14872   SeqMgrFeatContext cds_context;
14873   SeqLocPtr  prot_slp;
14874 
14875   cds = SeqMgrGetOverlappingCDS (nuc_slp, &cds_context);
14876   if (cds == NULL) {
14877     if (no_cds != NULL) {
14878       *no_cds = TRUE;
14879     }
14880     return NULL;
14881   } else if (no_cds != NULL) {
14882     *no_cds = FALSE;
14883   }
14884 
14885   prot_slp = BuildProtLoc (cds, nuc_slp, NULL);
14886   return prot_slp;
14887 }
14888 
14889 
14890 
14891 /*---------------------------------------------------------------------*/
14892 /*                                                                     */
14893 /* ConvertImpToProt () - Convert a given import feature to a    */
14894 /*                           protein feature.                          */
14895 /*                                                                     */
14896 /*    Note : Any of the Import feature's gbquals that can be converted */
14897 /*           to protein fields are caught in the automatic cleanup     */
14898 /*           called during reindexing, so they don't need to be        */
14899 /*           converted here.                                           */
14900 /*                                                                     */
14901 /*---------------------------------------------------------------------*/
14902 //Not used for Autodef or Cleanup
ConvertImpToProtFunc(SeqFeatPtr sfp,Uint2 featdef_to)14903 NLM_EXTERN Boolean ConvertImpToProtFunc
14904 (SeqFeatPtr  sfp,
14905  Uint2       featdef_to)
14906 {
14907   ImpFeatPtr ifp;
14908   SeqFeatPtr cds;
14909   SeqLocPtr  slp;
14910   SeqFeatPtr newSfp;
14911   Int4       frame;
14912   ProtRefPtr prp;
14913   SeqIdPtr   sip;
14914   BioseqPtr  bsp;
14915   SeqMgrFeatContext fcontext;
14916 
14917   if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP)
14918   {
14919     return FALSE;
14920   }
14921   /* Get the Import Feature */
14922 
14923   ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
14924   if (NULL == ifp)
14925   {
14926     return FALSE;
14927   }
14928 
14929   /* Convert the location to a protein location */
14930   cds = SeqMgrGetOverlappingCDS (sfp->location, &fcontext);
14931   if (cds == NULL)
14932   {
14933     return FALSE;
14934   }
14935 
14936   slp = BuildProtLoc (cds, sfp->location, &frame);
14937   if (slp == NULL)
14938   {
14939     return FALSE;
14940   }
14941 
14942   /* Create a new generic feature */
14943 
14944   sip = SeqLocId (cds->product);
14945   if (sip == NULL)
14946   {
14947     slp = SeqLocFree (slp);
14948     return FALSE;
14949   }
14950 
14951   bsp = BioseqLockById (sip);
14952   if (bsp == NULL)
14953   {
14954     slp = SeqLocFree (slp);
14955     return FALSE;
14956   }
14957 
14958   newSfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_PROT, slp);
14959   BioseqUnlock (bsp);
14960   if (newSfp == NULL)
14961   {
14962     slp = SeqLocFree (slp);
14963     return FALSE;
14964   }
14965 
14966   /* Make it into a protein feature */
14967 
14968   prp = ProtRefNew ();
14969   newSfp->data.value.ptrvalue = (Pointer) prp;
14970   if (NULL == prp)
14971   {
14972     slp = SeqLocFree (slp);
14973     newSfp = SeqFeatFree (newSfp);
14974     return FALSE;
14975   }
14976 
14977   switch (featdef_to) {
14978     case FEATDEF_mat_peptide_aa :
14979       prp->processed = 2;
14980       break;
14981     case FEATDEF_sig_peptide_aa :
14982       prp->processed = 3;
14983       break;
14984     case FEATDEF_transit_peptide_aa :
14985       prp->processed = 4;
14986       break;
14987     case FEATDEF_propeptide_aa :
14988       prp->processed = 5;
14989       break;
14990   }
14991 
14992   /* Transfer unchanged fields from old feature */
14993 
14994   newSfp->partial     = sfp->partial;
14995   newSfp->excpt       = sfp->excpt;
14996   newSfp->exp_ev      = sfp->exp_ev;
14997   newSfp->pseudo      = sfp->pseudo;
14998   newSfp->comment     = sfp->comment;
14999   newSfp->qual        = sfp->qual;
15000   newSfp->title       = sfp->title;
15001   newSfp->ext         = sfp->ext;
15002   newSfp->cit         = sfp->cit;
15003   newSfp->xref        = sfp->xref;
15004   newSfp->dbxref      = sfp->dbxref;
15005   newSfp->except_text = sfp->except_text;
15006 
15007   /* Null out pointers to transferred fields from old feature  */
15008   /* so that they don't get deleted when the feature does,     */
15009 
15010   sfp->comment     = NULL;
15011   sfp->qual        = NULL;
15012   sfp->title       = NULL;
15013   sfp->ext         = NULL;
15014   sfp->cit         = NULL;
15015   sfp->xref        = NULL;
15016   sfp->dbxref      = NULL;
15017   sfp->except_text = NULL;
15018 
15019   /* Mark the old feature for deletion */
15020 
15021   sfp->idx.deleteme = TRUE;
15022   return TRUE;
15023 }
15024 
15025 //Not used for Autodef or Cleanup
FindNucleotideLocationForProteinFeatureConversion(SeqLocPtr slp)15026 NLM_EXTERN SeqLocPtr FindNucleotideLocationForProteinFeatureConversion (SeqLocPtr slp)
15027 {
15028   SeqMgrFeatContext context;
15029   SeqFeatPtr cds;
15030   SeqLocPtr  slp_nuc;
15031   Boolean    partial5, partial3;
15032 
15033   cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (slp), &context);
15034   if (NULL == cds)
15035   {
15036     return NULL;
15037   }
15038 
15039   slp_nuc = aaLoc_to_dnaLoc (cds, slp);
15040   if (slp_nuc == NULL)
15041   {
15042     CheckSeqLocForPartial (slp, &partial5, &partial3);
15043     if (partial5 && partial3) {
15044       slp_nuc = AsnIoMemCopy (cds->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
15045     }
15046   }
15047   return slp_nuc;
15048 }
15049 
15050 
15051 /*---------------------------------------------------------------------*/
15052 /*                                                                     */
15053 /* ConvertProtToImp () -                                        */
15054 /*                                                                     */
15055 /*---------------------------------------------------------------------*/
15056 //Not used for Autodef or Cleanup
ConvertProtToImpFunc(SeqFeatPtr sfp,Uint2 featdef_to)15057 NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr  sfp, Uint2 featdef_to)
15058 {
15059   ProtRefPtr    prp;
15060   SeqLocPtr     slp;
15061   ImpFeatPtr    ifp;
15062   CharPtr       name;
15063   CharPtr       ec;
15064   CharPtr       activity;
15065   ValNodePtr    vnp;
15066   GBQualPtr     gbqual = NULL;
15067   GBQualPtr     prevGbq;
15068   GBQualPtr     topOfGbqList = NULL;
15069   DbtagPtr      dbt;
15070   Char          idStr[64];
15071   ObjectIdPtr   oip;
15072   Uint2         entityID;
15073   BioseqPtr     bsp;
15074   SeqAnnotPtr   old_sap, new_sap;
15075   SeqFeatPtr    tmp, tmp_prev;
15076 
15077   /* Make sure that we have a matching peptide feature */
15078 
15079   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT)
15080   {
15081     return FALSE;
15082   }
15083   entityID = sfp->idx.entityID;
15084 
15085   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
15086   if (NULL == prp)
15087   {
15088     return FALSE;
15089   }
15090 
15091   switch (sfp->idx.subtype) {
15092     case FEATDEF_mat_peptide_aa :
15093       if (2 != prp->processed)
15094         return FALSE;
15095       break;
15096     case FEATDEF_sig_peptide_aa :
15097       if (3 != prp->processed)
15098         return FALSE;
15099       break;
15100     case FEATDEF_transit_peptide_aa :
15101       if (4 != prp->processed)
15102         return FALSE;
15103       break;
15104     case FEATDEF_propeptide_aa :
15105       if (5 != prp->processed)
15106         return FALSE;
15107       break;
15108   }
15109 
15110   /* Convert the location from the protein */
15111   /* to the nucleotide Bioseq.             */
15112 
15113   slp = FindNucleotideLocationForProteinFeatureConversion (sfp->location);
15114   if (NULL == slp)
15115     return FALSE;
15116   sfp->location = SeqLocFree (sfp->location);
15117   sfp->location = slp;
15118   /* move feature to correct annot */
15119   if (sfp->idx.parenttype == OBJ_SEQANNOT
15120       && (old_sap = (SeqAnnotPtr) sfp->idx.parentptr) != NULL
15121       && old_sap->type == 1
15122       && (bsp = BioseqFindFromSeqLoc (sfp->location)) != NULL)
15123   {
15124     tmp_prev = NULL;
15125     tmp = old_sap->data;
15126     while (tmp != sfp) {
15127       tmp_prev = tmp;
15128       tmp = tmp->next;
15129     }
15130     if (tmp != NULL) {
15131       if (tmp_prev == NULL) {
15132         old_sap->data = tmp->next;
15133       } else {
15134         tmp_prev->next = tmp->next;
15135       }
15136       if (old_sap->data == NULL) {
15137         old_sap->idx.deleteme = TRUE;
15138       }
15139       new_sap = bsp->annot;
15140       while (new_sap != NULL && new_sap->type != 1) {
15141         new_sap = new_sap->next;
15142       }
15143       if (new_sap == NULL) {
15144         new_sap = SeqAnnotNew ();
15145         new_sap->type = 1;
15146         new_sap->next = bsp->annot;
15147         bsp->annot = new_sap;
15148       }
15149       sfp->next = new_sap->data;
15150       new_sap->data = sfp;
15151       sfp->idx.parentptr = new_sap;
15152     }
15153   }
15154 
15155 
15156   /* Create a new import feature and */
15157   /* attach it to the feature.       */
15158 
15159   ifp = ImpFeatNew ();
15160   if (NULL == ifp)
15161   {
15162     return FALSE;
15163   }
15164 
15165   /* set key */
15166   ifp->key = StringSave (GetFeatureNameFromFeatureType(featdef_to));
15167 
15168   sfp->data.choice = SEQFEAT_IMP;
15169   sfp->data.value.ptrvalue = (Pointer) ifp;
15170 
15171   /* Store the protein fields as  */
15172   /* gbqual qualifier/value pairs */
15173 
15174   name = NULL;
15175   vnp = prp->name;
15176   if (vnp != NULL)
15177     name = vnp->data.ptrvalue;
15178   if (name == NULL)
15179     name = prp->desc;
15180 
15181   if (name != NULL) {
15182     gbqual = GBQualNew ();
15183     if (NULL == gbqual)
15184       return FALSE;
15185     topOfGbqList = gbqual;
15186     gbqual->qual = StringSave ("product");
15187     gbqual->val = StringSave (name);
15188   }
15189 
15190   prevGbq = gbqual;
15191 
15192   ec = NULL;
15193   vnp = prp->ec;
15194   if (vnp != NULL)
15195     ec = (CharPtr) vnp->data.ptrvalue;
15196 
15197   if (ec != NULL) {
15198     gbqual = GBQualNew ();
15199     if (NULL == gbqual)
15200       return FALSE;
15201     if (prevGbq != NULL) {
15202       prevGbq->next = gbqual;
15203     }
15204     gbqual->qual = StringSave ("EC_number");
15205     gbqual->val = StringSave (ec);
15206   }
15207 
15208   prevGbq = gbqual;
15209 
15210   activity = NULL;
15211   vnp = prp->activity;
15212   if (vnp != NULL)
15213     activity = (CharPtr) vnp->data.ptrvalue;
15214 
15215   if (NULL != activity) {
15216     gbqual = GBQualNew ();
15217     if (NULL == gbqual)
15218       return FALSE;
15219     if (prevGbq != NULL) {
15220       prevGbq->next = gbqual;
15221     }
15222     gbqual->qual = StringSave ("function");
15223     gbqual->val = StringSave (activity);
15224   }
15225 
15226   prevGbq = gbqual;
15227 
15228   for (vnp = prp->db; vnp != NULL; vnp = vnp->next) {
15229     dbt = (DbtagPtr) vnp->data.ptrvalue;
15230     if (NULL == dbt )
15231       continue;
15232     if (! StringHasNoText (dbt->db)) {
15233       gbqual = GBQualNew ();
15234       if (NULL == gbqual)
15235         continue;
15236       if (prevGbq != NULL) {
15237         prevGbq->next = gbqual;
15238       }
15239       oip = dbt->tag;
15240       if (oip->str != NULL && (! StringHasNoText (oip->str))) {
15241         sprintf (idStr, "%s:%s", (CharPtr)dbt->tag, oip->str);
15242         gbqual->qual = StringSave ("db_xref");
15243         gbqual->val = StringSave (idStr);
15244       } else {
15245         sprintf (idStr, "%s:%ld", (CharPtr)dbt->tag, (long) oip->id);
15246         gbqual->qual = StringSave ("db_xref");
15247         gbqual->val = StringSave (idStr);
15248       }
15249       prevGbq = gbqual;
15250     }
15251   }
15252 
15253   /* Insert the new qualifiers in front of any existing ones */
15254 
15255   if (gbqual != NULL) {
15256     gbqual->next = sfp->qual;
15257   }
15258   sfp->qual = topOfGbqList;
15259 
15260   /* Free the obsolete Protein reference */
15261 
15262   ProtRefFree (prp);
15263   return TRUE;
15264 }
15265 
15266 //Not used for Autodef and Cleanup
15267 /* functions for converting from biosource */
SubSourceText(BioSourcePtr biop,Uint1 subtype,BoolPtr found)15268 NLM_EXTERN CharPtr SubSourceText (BioSourcePtr biop, Uint1 subtype, BoolPtr found)
15269 {
15270   Int4 subtype_len = 0;
15271   SubSourcePtr ssp;
15272   CharPtr subtype_txt = NULL;
15273 
15274   if (biop == NULL || biop->subtype == NULL) return NULL;
15275   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
15276     if (ssp->subtype == subtype) {
15277       if (found != NULL) *found = TRUE;
15278       if (!StringHasNoText (ssp->name)) {
15279         subtype_len += StringLen (ssp->name) + 1;
15280       }
15281     }
15282   }
15283   if (subtype_len == 0) return NULL;
15284   subtype_txt = (CharPtr) MemNew (sizeof (Char) * subtype_len);
15285   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
15286     if (ssp->subtype == subtype && !StringHasNoText (ssp->name)) {
15287       if (!StringHasNoText (subtype_txt)) {
15288         StringCat (subtype_txt, ";");
15289       }
15290       StringCat (subtype_txt, ssp->name);
15291     }
15292   }
15293   return subtype_txt;
15294 }
15295 
15296 //Not used for Autodef and Cleanup
OrgModText(BioSourcePtr biop,Uint1 subtype,BoolPtr found)15297 NLM_EXTERN CharPtr OrgModText (BioSourcePtr biop, Uint1 subtype, BoolPtr found)
15298 {
15299   Int4 subtype_len = 0;
15300   OrgModPtr omp;
15301   CharPtr subtype_txt = NULL;
15302 
15303   if (biop == NULL
15304      || biop->org == NULL
15305      || biop->org->orgname == NULL
15306      || biop->org->orgname->mod == NULL) {
15307     return NULL;
15308   }
15309 
15310   for (omp = biop->org->orgname->mod; omp != NULL; omp = omp->next) {
15311     if (omp->subtype == subtype) {
15312       if (found != NULL) *found = TRUE;
15313       if (!StringHasNoText (omp->subname)) {
15314         subtype_len += StringLen (omp->subname) + 1;
15315       }
15316     }
15317   }
15318   if (subtype_len == 0) return NULL;
15319   subtype_txt = (CharPtr) MemNew (sizeof (Char) * subtype_len);
15320   for (omp = biop->org->orgname->mod; omp != NULL; omp = omp->next) {
15321     if (omp->subtype == subtype && !StringHasNoText (omp->subname)) {
15322       if (!StringHasNoText (subtype_txt)) {
15323         StringCat (subtype_txt, ";");
15324       }
15325       StringCat (subtype_txt, omp->subname);
15326     }
15327   }
15328   return subtype_txt;
15329 }
15330 
15331 //Not used for Autodef and Cleanup
NoteText(BioSourcePtr biop,CharPtr comment)15332 NLM_EXTERN CharPtr NoteText (BioSourcePtr biop, CharPtr comment)
15333 {
15334   CharPtr orgmod_note, subsource_note;
15335   Int4    text_len = 0;
15336   CharPtr note_text = NULL;
15337 
15338   orgmod_note = OrgModText(biop, ORGMOD_other, NULL);
15339   if (!StringHasNoText (orgmod_note)) {
15340     text_len += StringLen (orgmod_note) + 1;
15341   }
15342   subsource_note = SubSourceText (biop, SUBSRC_other, NULL);
15343   if (!StringHasNoText (subsource_note)) {
15344     text_len += StringLen (subsource_note) + 1;
15345   }
15346   if (!StringHasNoText (comment)) {
15347     text_len += StringLen (comment) + 1;
15348   }
15349 
15350   if (text_len == 0) return NULL;
15351 
15352   note_text = (CharPtr) MemNew (sizeof(Char) * text_len);
15353   if (!StringHasNoText (orgmod_note)) {
15354     StringCat (note_text, orgmod_note);
15355   }
15356   orgmod_note = MemFree (orgmod_note);
15357   if (!StringHasNoText (subsource_note)) {
15358     if (!StringHasNoText (note_text)) {
15359       StringCat (note_text, ";");
15360     }
15361     StringCat (note_text, subsource_note);
15362   }
15363   subsource_note = MemFree (subsource_note);
15364 
15365   if (!StringHasNoText (comment)) {
15366     if (!StringHasNoText (note_text)) {
15367       StringCat (note_text, ";");
15368     }
15369     StringCat (note_text, comment);
15370   }
15371   return note_text;
15372 }
15373 
15374 //Not used for Autodef and Cleanup
15375 /*---------------------------------------------------------------------*/
15376 /*                                                                     */
15377 /* ConvertBioSrcToRepeatRegion ()                                  */
15378 /*                                                                     */
15379 /* 9/28/2004: Changed to convert all BioSource features with notes     */
15380 /* instead of ones with transposon or insertion_seq qualifiers.        */
15381 /*---------------------------------------------------------------------*/
15382 
ConvertBioSrcToRepeatRegion(SeqFeatPtr sfp,Uint2 featdef_to)15383 NLM_EXTERN Boolean ConvertBioSrcToRepeatRegion (SeqFeatPtr sfp, Uint2 featdef_to)
15384 {
15385   BioSourcePtr  biop;
15386   GBQualPtr     gbqual;
15387   ImpFeatPtr    ifp;
15388   CharPtr       transposon_txt, insertion_seq_txt, note_txt;
15389   Boolean       is_transposon = FALSE, is_insertion_seq = FALSE;
15390 
15391   if (sfp == NULL || sfp->idx.subtype != FEATDEF_BIOSRC) return FALSE;
15392 
15393   biop = (BioSourcePtr) sfp->data.value.ptrvalue;
15394 
15395   transposon_txt = SubSourceText (biop, SUBSRC_transposon_name, &is_transposon);
15396   insertion_seq_txt = SubSourceText (biop, SUBSRC_insertion_seq_name, &is_insertion_seq);
15397   note_txt = NoteText (biop, sfp->comment);
15398 
15399 
15400   /* Create a new Import Feature */
15401 
15402   ifp = ImpFeatNew ();
15403   if (NULL == ifp)
15404     return FALSE;
15405   ifp->key = StringSave ("repeat_region");
15406 
15407   /* Copy relevant info from the BioSource */
15408   /* feature to the Import feature.        */
15409 
15410 
15411   /* Delete the old BioSource feature */
15412 
15413   sfp->data.value.ptrvalue = BioSourceFree (biop);
15414 
15415   /* Attach the new Import feature in its place */
15416 
15417   sfp->data.choice = SEQFEAT_IMP;
15418   sfp->data.value.ptrvalue = ifp;
15419 
15420   if (is_transposon) {
15421     gbqual = GBQualNew ();
15422     gbqual->qual = StringSave ("mobile_element");
15423     gbqual->val = (CharPtr) MemNew (sizeof(Char) * (StringLen (transposon_txt) + 12));
15424     StringCat (gbqual->val, "transposon:");
15425     StringCat (gbqual->val, transposon_txt);
15426     gbqual->next = sfp->qual;
15427     sfp->qual = gbqual;
15428   }
15429   transposon_txt = MemFree (transposon_txt);
15430 
15431   if (is_insertion_seq) {
15432     gbqual = GBQualNew ();
15433     gbqual->qual = StringSave ("mobile_element");
15434     gbqual->val = (CharPtr) MemNew (sizeof(Char) * (StringLen (insertion_seq_txt) + 19));
15435     StringCat (gbqual->val, "insertion sequence:");
15436     StringCat (gbqual->val, insertion_seq_txt);
15437     gbqual->next = sfp->qual;
15438     sfp->qual = gbqual;
15439   }
15440   insertion_seq_txt = MemFree (insertion_seq_txt);
15441 
15442   sfp->comment = MemFree (sfp->comment);
15443   sfp->comment = note_txt;
15444   return TRUE;
15445 }
15446 
15447 //Not used for Autodef and Cleanup
15448 NLM_EXTERN Boolean
ConvertNonPseudoCDSToMiscFeat(SeqFeatPtr sfp,Boolean viral)15449 ConvertNonPseudoCDSToMiscFeat
15450 (SeqFeatPtr sfp,
15451  Boolean viral)
15452 {
15453   CdRegionPtr          cdrp;
15454   ImpFeatPtr           ifp;
15455   CharPtr              noteStr;
15456   BioseqPtr            protBsp;
15457   SeqMgrFeatContext    protContext;
15458   CharPtr              protName = NULL;
15459   SeqFeatPtr           protSfp;
15460   ProtRefPtr           prp;
15461   ValNodePtr           vnp;
15462   Int4                 note_len = 0;
15463   CharPtr              viral_fmt = "nonfunctional %s due to mutation";
15464   CharPtr              similar_fmt = "similar to %s";
15465 
15466   if (sfp == NULL
15467       || sfp->data.choice != SEQFEAT_CDREGION
15468       || sfp->product == NULL)
15469   {
15470     return FALSE;
15471   }
15472 
15473   /* Get the CD region part of the feature, and */
15474   /* the associated protein bioseq.             */
15475 
15476   cdrp = (CdRegionPtr) sfp->data.value.ptrvalue;
15477   protBsp = BioseqFindFromSeqLoc (sfp->product);
15478 
15479   if (protBsp == NULL) return FALSE;
15480 
15481   /* Convert the CDS feature to a misc_feat */
15482 
15483   CdRegionFree (cdrp);
15484   sfp->data.value.ptrvalue = NULL;
15485 
15486   ifp = ImpFeatNew ();
15487   if (NULL == ifp) return FALSE;
15488   ifp->key = StringSave ("misc_feature");
15489 
15490   sfp->data.choice = SEQFEAT_IMP;
15491   sfp->data.value.ptrvalue = (Pointer) ifp;
15492 
15493   /* Add a name key to the misc_feature */
15494 
15495   protSfp = SeqMgrGetBestProteinFeature (protBsp, &protContext);
15496   if (protSfp != NULL)
15497   {
15498     prp = (ProtRefPtr) protSfp->data.value.ptrvalue;
15499 
15500     if (prp != NULL)
15501     {
15502       note_len = StringLen (sfp->comment) + StringLen (prp->desc) + 5;
15503 
15504       vnp = prp->name;
15505       if (NULL != vnp)
15506       {
15507         protName = (CharPtr) vnp->data.ptrvalue;
15508         if (NULL != protName)
15509         {
15510           if (viral) {
15511             note_len += StringLen (viral_fmt) + StringLen (protName);
15512           } else {
15513             note_len += StringLen (similar_fmt) + StringLen (protName);
15514           }
15515         }
15516       }
15517       noteStr = (CharPtr) MemNew (sizeof (Char) * note_len);
15518 
15519       if (NULL != protName) {
15520         if (viral) {
15521           sprintf (noteStr, viral_fmt, protName);
15522         } else {
15523           sprintf (noteStr, similar_fmt, protName);
15524         }
15525       }
15526       if (!StringHasNoText (prp->desc)) {
15527         if (!StringHasNoText (noteStr)) {
15528           StringCat (noteStr, "; ");
15529         }
15530         StringCat (noteStr, prp->desc);
15531       }
15532       if (!StringHasNoText (sfp->comment)) {
15533         if (!StringHasNoText (noteStr)) {
15534           StringCat (noteStr, "; ");
15535         }
15536         StringCat (noteStr, sfp->comment);
15537       }
15538       sfp->comment = MemFree (sfp->comment);
15539       sfp->comment = noteStr;
15540     }
15541   }
15542 
15543   /* set the subtype to zero so that it will be reindexed */
15544   sfp->idx.subtype = 0;
15545   return TRUE;
15546 }
15547 
15548 //Not used for Autodef and Cleanup
RnaTypeFromFeatdef(Uint2 featdef)15549 NLM_EXTERN Uint1 RnaTypeFromFeatdef (Uint2 featdef)
15550 {
15551   switch (featdef)
15552   {
15553     case FEATDEF_preRNA:
15554       return 1;
15555       break;
15556     case FEATDEF_mRNA:
15557       return 2;
15558       break;
15559     case FEATDEF_tRNA:
15560       return 3;
15561       break;
15562     case FEATDEF_rRNA:
15563       return 4;
15564       break;
15565     case FEATDEF_snRNA:
15566       return 8;
15567       break;
15568     case FEATDEF_scRNA:
15569       return 8;
15570       break;
15571     case FEATDEF_snoRNA:
15572       return 8;
15573       break;
15574     case FEATDEF_ncRNA:
15575       return 8;
15576       break;
15577     case FEATDEF_tmRNA:
15578       return 9;
15579       break;
15580     case FEATDEF_otherRNA:
15581     default:
15582       return 255;
15583       break;
15584   }
15585 }
15586 
15587 //Not used for Autodef and Cleanup
ConvertRegionToRNAFunc(SeqFeatPtr sfp,Uint2 featdef_to)15588 NLM_EXTERN Boolean ConvertRegionToRNAFunc
15589 (SeqFeatPtr sfp,
15590  Uint2      featdef_to)
15591 {
15592   RnaRefPtr  rrp;
15593   CharPtr    str, new_comment;
15594   Boolean    add_to_comment = FALSE;
15595   Int4       len;
15596 
15597   if (sfp == NULL || sfp->data.choice != SEQFEAT_REGION)
15598   {
15599     return FALSE;
15600   }
15601 
15602   str = (CharPtr) sfp->data.value.ptrvalue;
15603   rrp = RnaRefFromLabel (featdef_to, str, &add_to_comment);
15604 
15605   sfp->data.choice = SEQFEAT_RNA;
15606   sfp->data.value.ptrvalue = (Pointer) rrp;
15607 
15608   if (add_to_comment) {
15609     if (sfp->comment == NULL) {
15610       sfp->comment = str;
15611       str = NULL;
15612     } else {
15613       len = StringLen (sfp->comment) + StringLen (str) + 3;
15614       new_comment = MemNew (sizeof (Char) * len);
15615       sprintf (new_comment, "%s; %s", sfp->comment, str);
15616       sfp->comment = MemFree (sfp->comment);
15617       str = MemFree (str);
15618       sfp->comment = new_comment;
15619     }
15620   } else {
15621     str = MemFree (str);
15622   }
15623   return TRUE;
15624 
15625   return TRUE;
15626 }
15627 
15628 //Not used for Autodef and Cleanup
GetImportFeatureName(Uint2 featdef_key)15629 NLM_EXTERN CharPtr GetImportFeatureName (Uint2 featdef_key)
15630 {
15631   FeatDefPtr  curr;
15632   Uint1       key;
15633   CharPtr     label = NULL;
15634 
15635   curr = FeatDefFindNext (NULL, &key, &label, FEATDEF_ANY, TRUE);
15636   while (curr != NULL)
15637   {
15638     if (featdef_key == key)
15639     {
15640       return curr->typelabel;
15641     }
15642     curr = FeatDefFindNext (curr, &key, &label, FEATDEF_ANY, TRUE);
15643   }
15644   return NULL;
15645 }
15646 
15647 //Not used for Autodef and Cleanup
ConvertRegionToImpFunc(SeqFeatPtr sfp,Uint2 featdef_to)15648 NLM_EXTERN Boolean ConvertRegionToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
15649 {
15650   GBQualPtr          gbqual;
15651   ImpFeatPtr         ifp;
15652   CharPtr            str;
15653   CharPtr            featname_to;
15654 
15655   if (sfp == NULL || sfp->data.choice != SEQFEAT_REGION) return FALSE;
15656   featname_to = GetImportFeatureName (featdef_to);
15657   ifp = ImpFeatNew ();
15658   if (NULL == ifp)
15659     return FALSE;
15660 
15661   str = (CharPtr) sfp->data.value.ptrvalue;
15662   sfp->data.choice = SEQFEAT_IMP;
15663   sfp->data.value.ptrvalue = (Pointer) ifp;
15664   if (featname_to == NULL)
15665   {
15666     ifp->key = StringSave ("misc_feature");
15667   }
15668   else
15669   {
15670     ifp->key = StringSave (featname_to);
15671   }
15672 
15673   if (! StringHasNoText (str)) {
15674     gbqual = GBQualNew ();
15675     if (gbqual != NULL) {
15676       gbqual->qual = StringSave ("note");
15677       gbqual->val = str;
15678       gbqual->next = sfp->qual;
15679       sfp->qual = gbqual;
15680     }
15681   }
15682   return TRUE;
15683 }
15684 
15685 //Not used for Autodef and Cleanup
ConvertImpToImpFunc(SeqFeatPtr sfp,Uint2 featdef_to)15686 NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to)
15687 {
15688   ImpFeatPtr         ifp;
15689   CharPtr            featname;
15690 
15691   if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP) return FALSE;
15692 
15693   ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
15694   if (NULL == ifp)
15695     return FALSE;
15696 
15697   featname = GetImportFeatureName (featdef_to);
15698   ifp->key = MemFree (ifp->key);
15699   if (featname == NULL)
15700   {
15701     ifp->key = StringSave ("misc_feature");
15702   }
15703   else
15704   {
15705     ifp->key = StringSave (featname);
15706   }
15707   sfp->idx.subtype = 0;
15708 
15709   return TRUE;
15710 }
15711 
15712 //Not used for Autodef and Cleanup
OkToAddToImpFeat(CharPtr val,Uint2 featdef_to)15713 static Boolean OkToAddToImpFeat (CharPtr val, Uint2 featdef_to)
15714 {
15715   if (StringHasNoText (val)) {
15716     return FALSE;
15717   } else if (featdef_to == FEATDEF_D_loop && StringsAreEquivalent(val, "D-Loop")) {
15718     return FALSE;
15719   } else {
15720     return TRUE;
15721   }
15722 }
15723 
15724 //Not used for Autodef and Cleanup
ConvertGeneToImpFeatFunc(SeqFeatPtr sfp,Uint2 featdef_to)15725 NLM_EXTERN Boolean ConvertGeneToImpFeatFunc
15726 (SeqFeatPtr sfp,
15727  Uint2      featdef_to)
15728 {
15729   ImpFeatPtr  ifp;
15730   CharPtr     new_comment;
15731   GeneRefPtr  grp;
15732   Int4        comment_len = 0;
15733   CharPtr     featname;
15734 
15735   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE)
15736   {
15737     return FALSE;
15738   }
15739   ifp = ImpFeatNew ();
15740   if (NULL == ifp)
15741   {
15742     return FALSE;
15743   }
15744 
15745   grp = (GeneRefPtr) sfp->data.value.ptrvalue;
15746   if (grp != NULL)
15747   {
15748     if (OkToAddToImpFeat (grp->locus, featdef_to))
15749     {
15750       comment_len += StringLen (grp->locus) + 2;
15751     }
15752     if (OkToAddToImpFeat (grp->desc, featdef_to))
15753     {
15754       comment_len += StringLen (grp->desc) + 2;
15755     }
15756   }
15757   if (comment_len == 0)
15758   {
15759     /* nothing to add to comment */
15760   }
15761   else
15762   {
15763     /* add one for terminating NULL */
15764     comment_len++;
15765     if (!StringHasNoText (sfp->comment))
15766     {
15767       comment_len += StringLen (sfp->comment) + 2;
15768     }
15769 
15770     new_comment = (CharPtr) MemNew (sizeof (Char) * comment_len);
15771     /* NOTE - I don't have to check for grp == NULL because
15772      * comment_len would only have been > 0 if grp had existed
15773      * and had nonempty fields.
15774      */
15775     if (OkToAddToImpFeat (grp->desc, featdef_to))
15776     {
15777       StringCat (new_comment, grp->desc);
15778       StringCat (new_comment, "; ");
15779     }
15780     if (OkToAddToImpFeat (grp->locus, featdef_to))
15781     {
15782       StringCat (new_comment, grp->locus);
15783       StringCat (new_comment, "; ");
15784     }
15785     if (!StringHasNoText (sfp->comment))
15786     {
15787       StringCat (new_comment, sfp->comment);
15788       StringCat (new_comment, "; ");
15789     }
15790     /* remove last semicolon */
15791     new_comment[StringLen (new_comment) - 2] = 0;
15792     sfp->comment = MemFree (sfp->comment);
15793     sfp->comment = new_comment;
15794   }
15795 
15796   sfp->data.value.ptrvalue =
15797     GeneRefFree ((GeneRefPtr) sfp->data.value.ptrvalue);
15798   sfp->data.choice = SEQFEAT_IMP;
15799   sfp->data.value.ptrvalue = (Pointer) ifp;
15800 
15801   featname = GetImportFeatureName (featdef_to);
15802   ifp->key = MemFree (ifp->key);
15803   if (featname == NULL)
15804   {
15805     ifp->key = StringSave ("misc_feature");
15806   }
15807   else
15808   {
15809     ifp->key = StringSave (featname);
15810   }
15811 
15812   return TRUE;
15813 }
15814 
15815 
15816 
15817 //Not used for Autodef and Cleanup
15818 /* For mat-peptide instantiation */
MakeMatPeptideProductId(SeqLocPtr mat_peptide_loc)15819 static SeqIdPtr MakeMatPeptideProductId (SeqLocPtr mat_peptide_loc)
15820 {
15821   ObjectIdPtr oip;
15822   SeqIdPtr    sip;
15823   Char        id_str[500];
15824   Int4        len;
15825   BioseqPtr   bsp;
15826 
15827   if (mat_peptide_loc == NULL) return NULL;
15828 
15829   bsp = BioseqFindFromSeqLoc (mat_peptide_loc);
15830   if (bsp == NULL) return NULL;
15831   sip = bsp->id;
15832   while (sip != NULL && sip->choice != SEQID_OTHER) {
15833     sip = sip->next;
15834   }
15835   if (sip == NULL) {
15836     sip = bsp->id;
15837     while (sip != NULL && sip->choice != SEQID_GENBANK) {
15838       sip = sip->next;
15839     }
15840   }
15841   if (sip == NULL) {
15842     sip = SeqIdFindBest (bsp->id, 0);
15843   }
15844   SeqIdWrite (sip, id_str, PRINTID_TEXTID_ACC_ONLY, sizeof (id_str) - 41);
15845   len = StringLen (id_str);
15846   sprintf (id_str + len, ":%d-%d", SeqLocStart (mat_peptide_loc) + 1, SeqLocStop (mat_peptide_loc) + 1);
15847 
15848   oip = ObjectIdNew ();
15849   oip->str = StringSave (id_str);
15850   sip = ValNodeNew (NULL);
15851   sip->choice = SEQID_LOCAL;
15852   sip->data.ptrvalue = oip;
15853   return sip;
15854 }
15855 
15856 //Not used for Autodef and Cleanup
InstantiateMatPeptideProductForProteinFeature(SeqFeatPtr sfp,Pointer data)15857 static void InstantiateMatPeptideProductForProteinFeature (SeqFeatPtr sfp, Pointer data)
15858 {
15859   BioseqPtr mat_bsp, prot_bsp;
15860   SeqEntryPtr master, sep, old;
15861   Int4        i, len;
15862   Int2        residue;
15863   SeqLocPtr   slp;
15864   ByteStorePtr src, dst;
15865   ProtRefPtr   prp_orig, prp_mat;
15866   SeqFeatPtr   sfp_mat;
15867   SeqDescrPtr  sdp;
15868   MolInfoPtr   mip;
15869   Boolean      partial5, partial3;
15870   Char         defline_buf[1024];
15871 
15872   if (sfp == NULL || sfp->idx.subtype != FEATDEF_mat_peptide_aa || sfp->product != NULL) {
15873     return;
15874   }
15875 
15876   prp_orig = sfp->data.value.ptrvalue;
15877 
15878   prot_bsp = BioseqFindFromSeqLoc (sfp->location);
15879   if (prot_bsp == NULL) {
15880     return;
15881   }
15882   master = GetBestTopParentForData (sfp->idx.entityID, prot_bsp);
15883   if (master == NULL) return;
15884 
15885   src = (ByteStorePtr) prot_bsp->seq_data;
15886 
15887   mat_bsp = BioseqNew ();
15888   if (mat_bsp == NULL) {
15889      return;
15890   }
15891   mat_bsp->mol = Seq_mol_aa;
15892   mat_bsp->repr = Seq_repr_raw;
15893   mat_bsp->seq_data_type = Seq_code_ncbieaa;
15894   mat_bsp->length = SeqLocLen (sfp->location);
15895   dst = BSNew (0);
15896   mat_bsp->seq_data = (SeqDataPtr) dst;
15897   BSSeek (dst, 0, SEEK_SET);
15898 
15899   for (slp = SeqLocFindNext (sfp->location, NULL);
15900        slp != NULL;
15901        slp = SeqLocFindNext (sfp->location, slp)) {
15902     BSSeek (src, SeqLocStart (slp), SEEK_SET);
15903     len = SeqLocLen (slp);
15904     for (i = 0; i < len; i++) {
15905       residue = BSGetByte (src);
15906       BSPutByte (dst, residue);
15907     }
15908   }
15909 
15910   old = SeqEntrySetScope (master);
15911 
15912   /*mat_bsp->id = MakeNewProteinSeqId (sfp->location, NULL); */
15913   mat_bsp->id = MakeMatPeptideProductId (sfp->location);
15914   SeqMgrAddToBioseqIndex (mat_bsp);
15915   SeqEntrySetScope (old);
15916   sep = SeqEntryNew ();
15917   if (sep != NULL) {
15918     sep->choice = 1;
15919     sep->data.ptrvalue = (Pointer) mat_bsp;
15920     SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) mat_bsp, sep);
15921   }
15922   SetSeqFeatProduct (sfp, mat_bsp);
15923   if (sep != NULL) {
15924     AddSeqEntryToSeqEntry (master, sep, TRUE);
15925   }
15926 
15927   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15928 
15929   /* set molinfo for new protein sequence */
15930   sdp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15931   if (sdp != NULL) {
15932     mip = MolInfoNew ();
15933     sdp->data.ptrvalue = (Pointer) mip;
15934     if (mip != NULL) {
15935       mip->biomol = 8;
15936       mip->tech = 13;
15937       if (partial5 && partial3) {
15938         mip->completeness = 5;
15939       } else if (partial5) {
15940         mip->completeness = 3;
15941       } else if (partial3) {
15942         mip->completeness = 4;
15943       /*
15944       } else if (partial) {
15945         mip->completeness = 2;
15946       */
15947       } else {
15948         mip->completeness = 0;
15949       }
15950     }
15951   }
15952 
15953 
15954   /* create protein feature for mat_bsp */
15955   sfp_mat = CreateNewFeature (SeqMgrGetSeqEntryForData (mat_bsp), NULL,
15956                           SEQFEAT_PROT, NULL);
15957 
15958   sfp_mat->location = SeqLocIntNew (0, mat_bsp->length - 1, Seq_strand_plus, SeqIdDup (mat_bsp->id));
15959   SetSeqLocPartial (sfp_mat->location, partial5, partial3);
15960   if (partial5 || partial3) {
15961     sfp_mat->partial = TRUE;
15962   }
15963 
15964   prp_mat = AsnIoMemCopy (prp_orig, (AsnReadFunc) ProtRefAsnRead, (AsnWriteFunc) ProtRefAsnWrite);
15965   prp_mat->processed = 0;
15966   sfp_mat->data.value.ptrvalue = prp_mat;
15967   if (sfp->comment != NULL) {
15968     sfp_mat->comment = StringSave (sfp->comment);
15969   }
15970 
15971   /* add title */
15972   SeqMgrIndexFeatures (0, mat_bsp);
15973   if (NewCreateDefLineBuf (NULL, mat_bsp, defline_buf, sizeof (defline_buf), FALSE, FALSE)
15974       && !StringHasNoText (defline_buf)) {
15975     sdp = CreateNewDescriptor (sep, Seq_descr_title);
15976     sdp->data.ptrvalue = StringSave (defline_buf);
15977   }
15978 
15979 }
15980 
15981 //Not used for Autodef and Cleanup
ExtraCDSCreationActions(SeqFeatPtr cds,SeqEntryPtr parent_sep)15982 NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep)
15983 {
15984   ByteStorePtr       bs;
15985   CharPtr            prot, ptr;
15986   BioseqPtr          bsp;
15987   Char               ch;
15988   Int4               i;
15989   SeqEntryPtr        psep, nsep;
15990   MolInfoPtr         mip;
15991   ValNodePtr         vnp, descr;
15992   SeqFeatPtr         prot_sfp;
15993   ProtRefPtr         prp;
15994   Boolean            partial5, partial3;
15995 
15996   if (cds == NULL) return;
15997 
15998   CheckSeqLocForPartial (cds->location, &partial5, &partial3);
15999 
16000   /* Create corresponding protein sequence data for the CDS */
16001 
16002   bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
16003   if (NULL == bs)
16004     return;
16005 
16006   prot = BSMerge (bs, NULL);
16007   bs = BSFree (bs);
16008   if (NULL == prot)
16009     return;
16010 
16011   ptr = prot;
16012   ch = *ptr;
16013   while (ch != '\0') {
16014     *ptr = TO_UPPER (ch);
16015     ptr++;
16016     ch = *ptr;
16017   }
16018   i = StringLen (prot);
16019   if (i > 0 && prot [i - 1] == '*') {
16020     prot [i - 1] = '\0';
16021   }
16022   bs = BSNew (1000);
16023   if (bs != NULL) {
16024     ptr = prot;
16025     BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr));
16026   }
16027 
16028   /* Create the product protein Bioseq */
16029 
16030   bsp = BioseqNew ();
16031   if (NULL == bsp)
16032     return;
16033 
16034   bsp->repr = Seq_repr_raw;
16035   bsp->mol = Seq_mol_aa;
16036   bsp->seq_data_type = Seq_code_ncbieaa;
16037   bsp->seq_data = (SeqDataPtr) bs;
16038   bsp->length = BSLen (bs);
16039   bs = NULL;
16040   bsp->id = MakeNewProteinSeqId (cds->location, NULL);
16041   SeqMgrAddToBioseqIndex (bsp);
16042 
16043   /* Create a new SeqEntry for the Prot Bioseq */
16044 
16045   psep = SeqEntryNew ();
16046   if (NULL == psep)
16047     return;
16048 
16049   psep->choice = 1;
16050   psep->data.ptrvalue = (Pointer) bsp;
16051   SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep);
16052 
16053   /* Add a descriptor to the protein Bioseq */
16054 
16055   mip = MolInfoNew ();
16056   if (NULL == mip)
16057     return;
16058 
16059   mip->biomol = 8;
16060   mip->tech = 8;
16061   if (partial5 && partial3) {
16062     mip->completeness = 5;
16063   } else if (partial5) {
16064     mip->completeness = 3;
16065   } else if (partial3) {
16066     mip->completeness = 4;
16067   }
16068   vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
16069   if (NULL == vnp)
16070     return;
16071 
16072   vnp->data.ptrvalue = (Pointer) mip;
16073 
16074   /**/
16075 
16076   descr = ExtractBioSourceAndPubs (parent_sep);
16077 
16078   AddSeqEntryToSeqEntry (parent_sep, psep, TRUE);
16079   nsep = FindNucSeqEntry (parent_sep);
16080   ReplaceBioSourceAndPubs (parent_sep, descr);
16081   SetSeqFeatProduct (cds, bsp);
16082 
16083   prp = ProtRefNew ();
16084 
16085   if (prp != NULL) {
16086     prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
16087     if (prot_sfp != NULL) {
16088       prot_sfp->data.value.ptrvalue = (Pointer) prp;
16089       SetSeqLocPartial (prot_sfp->location, partial5, partial3);
16090       prot_sfp->partial = (partial5 || partial3);
16091     }
16092   }
16093 }
16094 
16095 //Not used for Autodef and Cleanup
GetProtFeature(BioseqPtr protbsp)16096 NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
16097 {
16098   SeqMgrFeatContext fcontext;
16099   SeqAnnotPtr sap;
16100   SeqFeatPtr prot_sfp;
16101   ProtRefPtr prp;
16102 
16103   if (protbsp == NULL) return NULL;
16104 
16105   prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
16106   if (prot_sfp == NULL) {
16107     sap = protbsp->annot;
16108     while (sap != NULL && prot_sfp == NULL) {
16109       if (sap->type == 1) {
16110         prot_sfp = sap->data;
16111         while (prot_sfp != NULL
16112                && (prot_sfp->data.choice != SEQFEAT_PROT
16113                    || (prp = prot_sfp->data.value.ptrvalue) == NULL
16114                    || prp->processed != 0)) {
16115           prot_sfp = prot_sfp->next;
16116         }
16117       }
16118       sap = sap->next;
16119     }
16120   }
16121   return prot_sfp;
16122 }
16123 
16124 //Not used for Autodef and Cleanup
ConvertMiscFeatToGene(SeqFeatPtr sfp)16125 NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp)
16126 {
16127   GeneRefPtr grp;
16128   CharPtr    cp;
16129 
16130   if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) {
16131     return FALSE;
16132   }
16133   sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue);
16134   grp = GeneRefNew ();
16135   sfp->data.value.ptrvalue = grp;
16136   sfp->data.choice = SEQFEAT_GENE;
16137   sfp->idx.subtype = 0;
16138 
16139   if (!StringHasNoText (sfp->comment)) {
16140     cp = StringChr (sfp->comment, ';');
16141     if (cp != NULL) {
16142       *cp = 0;
16143     }
16144     grp->locus = StringSave (sfp->comment);
16145     if (cp != NULL) {
16146       cp = StringSave (cp + 1);
16147     }
16148     sfp->comment = MemFree (sfp->comment);
16149     sfp->comment = cp;
16150   }
16151   return TRUE;
16152 }
16153 
16154 //Not used for Autodef and Cleanup
ConvertMiscFeatToCodingRegion(SeqFeatPtr sfp)16155 NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp)
16156 {
16157   BioseqPtr bsp, prot_bsp;
16158   SeqFeatPtr prot;
16159   ProtRefPtr prp;
16160 
16161   if (sfp == NULL || sfp->idx.subtype != FEATDEF_misc_feature) {
16162     return FALSE;
16163   }
16164 
16165   sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue);
16166   sfp->data.value.ptrvalue = CdRegionNew ();
16167   sfp->data.choice = SEQFEAT_CDREGION;
16168   sfp->idx.subtype = 0;
16169 
16170   bsp = BioseqFindFromSeqLoc (sfp->location);
16171   if (bsp != NULL) {
16172     ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp));
16173     if (!StringHasNoText (sfp->comment)) {
16174       prot_bsp = BioseqFindFromSeqLoc (sfp->product);
16175       prot = GetProtFeature (prot_bsp);
16176       if (prot != NULL) {
16177         prp = prot->data.value.ptrvalue;
16178         ValNodeAddPointer (&prp->name, 0, sfp->comment);
16179         sfp->comment = NULL;
16180       }
16181     }
16182   }
16183 
16184   return TRUE;
16185 }
16186 
16187 //Not used for Autodef and Cleanup
ConvertmRNAToCodingRegion(SeqFeatPtr sfp)16188 NLM_EXTERN Boolean ConvertmRNAToCodingRegion (SeqFeatPtr sfp)
16189 {
16190   BioseqPtr bsp, prot_bsp;
16191   SeqFeatPtr prot;
16192   ProtRefPtr prp;
16193   RnaRefPtr  rrp;
16194   CharPtr    product = NULL;
16195 
16196   if (sfp == NULL || sfp->idx.subtype != FEATDEF_mRNA) {
16197     return FALSE;
16198   }
16199 
16200   rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
16201   if (rrp != NULL && rrp->ext.choice == 1) {
16202       product = StringSave(rrp->ext.value.ptrvalue);
16203   }
16204 
16205   sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
16206   sfp->data.value.ptrvalue = CdRegionNew ();
16207   sfp->data.choice = SEQFEAT_CDREGION;
16208   sfp->idx.subtype = 0;
16209 
16210   bsp = BioseqFindFromSeqLoc (sfp->location);
16211   if (bsp != NULL) {
16212     ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp));
16213     prot_bsp = BioseqFindFromSeqLoc (sfp->product);
16214     prot = GetProtFeature (prot_bsp);
16215     if (prot != NULL) {
16216       prp = prot->data.value.ptrvalue;
16217       if (!StringHasNoText(product)) {
16218         ValNodeAddPointer (&prp->name, 0, product);
16219         product = NULL;
16220       }
16221       if (!StringHasNoText (sfp->comment)) {
16222         ValNodeAddPointer (&prp->name, 0, sfp->comment);
16223         sfp->comment = NULL;
16224       }
16225     }
16226   }
16227   product = MemFree (product);
16228 
16229   return TRUE;
16230 }
16231 
16232 //Not used for Autodef and Cleanup
ConverttRNAToGene(SeqFeatPtr sfp)16233 NLM_EXTERN Boolean ConverttRNAToGene(SeqFeatPtr sfp)
16234 {
16235   RnaRefPtr  rrp;
16236   tRNAPtr    trp;
16237   GeneRefPtr grp;
16238 
16239   if (sfp == NULL || sfp->idx.subtype != FEATDEF_tRNA
16240       || (rrp = (RnaRefPtr)(sfp->data.value.ptrvalue)) == NULL
16241       || rrp->ext.choice != 2
16242       || (trp = (tRNAPtr)(rrp->ext.value.ptrvalue)) == NULL)
16243    {
16244     return FALSE;
16245   }
16246 
16247   grp = GeneRefNew();
16248   grp->desc = GetRNARefProductString(rrp, NULL);
16249   sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
16250   sfp->data.value.ptrvalue = grp;
16251   sfp->data.choice = SEQFEAT_GENE;
16252   sfp->idx.subtype = 0;
16253   return TRUE;
16254 }
16255 
16256 //Not used for Autodef and Cleanup
InstantiateMatPeptideProducts(SeqEntryPtr sep)16257 NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep)
16258 {
16259   VisitFeaturesInSep (sep, NULL, InstantiateMatPeptideProductForProteinFeature);
16260 }
16261 
16262 
16263 //Not used for Autodef and Cleanup
GetTSAIDDB(BioseqPtr bsp)16264 NLM_EXTERN CharPtr GetTSAIDDB (BioseqPtr bsp)
16265 {
16266   CharPtr db = NULL, cp;
16267   Int4    gpid = 0;
16268   CharPtr id_fmt = "gpid:%d";
16269   CharPtr bpid_fmt = "bpid:%s";
16270 
16271   gpid = GetGenomeProjectID (bsp);
16272   if (gpid <= 0) {
16273     cp = GetBioProjectIdFromBioseq(bsp, NULL);
16274 	  if (cp != NULL) {
16275 	    db = MemNew (sizeof (Char) * (StringLen (bpid_fmt) + StringLen (cp)));
16276 	    sprintf (db, bpid_fmt, cp);
16277 	    cp = MemFree (cp);
16278 	  }
16279   } else {
16280 	  db = MemNew (sizeof (Char) * (StringLen (id_fmt) + 15));
16281     sprintf (db, id_fmt, gpid);
16282   }
16283   return db;
16284 }
16285 
16286 
16287 //Not used for Autodef and Cleanup
ConvertLocalIdsToBarcodeIdsCallback(BioseqPtr bsp,Pointer data)16288 static void ConvertLocalIdsToBarcodeIdsCallback (BioseqPtr bsp, Pointer data)
16289 {
16290   SeqIdPtr        sip_local = NULL;
16291   SeqEntryPtr     top_sep;
16292   SeqIdPtr        sip_new;
16293   DbtagPtr        dbtag;
16294   ObjectIdPtr     oip = NULL;
16295 
16296   if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
16297     return;
16298   }
16299 
16300   top_sep = (SeqEntryPtr) data;
16301 
16302   for (sip_local = bsp->id;
16303        sip_local != NULL && sip_local->choice != SEQID_LOCAL;
16304        sip_local = sip_local->next)
16305   {}
16306   if (sip_local == NULL) return;
16307   oip = sip_local->data.ptrvalue;
16308   if (oip == NULL) return;
16309 
16310   dbtag = DbtagNew ();
16311   dbtag->db = StringSave ("uoguelph");
16312   dbtag->tag = ObjectIdNew ();
16313   if (oip->str == NULL) {
16314     dbtag->tag->id = oip->id;
16315   } else {
16316     dbtag->tag->str = StringSave (oip->str);
16317   }
16318   sip_new = ValNodeNew (NULL);
16319   sip_new->choice = SEQID_GENERAL;
16320   sip_new->data.ptrvalue = dbtag;
16321   ReplaceSeqIdWithSeqId (sip_local, sip_new, top_sep);
16322 }
16323 
16324 
16325 //Not used for Autodef and Cleanup
ConvertLocalIdsToBarcodeIds(SeqEntryPtr sep)16326 NLM_EXTERN void ConvertLocalIdsToBarcodeIds (SeqEntryPtr sep)
16327 {
16328   VisitBioseqsInSep (sep, sep, ConvertLocalIdsToBarcodeIdsCallback);
16329 }
16330 //LCOV_EXCL_STOP
16331 
GetDeflinePosForFieldName(CharPtr name)16332 NLM_EXTERN Int4 GetDeflinePosForFieldName(CharPtr name)
16333 {
16334     Int4    i, rval = -1;
16335 
16336     if (StringICmp(name, "specimen-voucher") == 0) {
16337         rval = DEFLINE_POS_Specimen_voucher;
16338     } else {
16339         for (i = 0; i < numDefLineModifiers; i++) {
16340             if (StringICmp(name, DefLineModifiers[i].name) == 0) {
16341                 rval = i;
16342                 break;
16343             }
16344         }
16345     }
16346     return rval;
16347 }
16348 
16349 
16350 //LCOV_EXCL_START
16351 //Not used in Autodef or Cleanup
GetDeflinePosForFieldType(ValNodePtr field)16352 NLM_EXTERN Int4 GetDeflinePosForFieldType (ValNodePtr field)
16353 {
16354   Int4    rval = -1;
16355   CharPtr name;
16356 
16357   name = SummarizeFieldType (field);
16358   rval = GetDeflinePosForFieldName(name);
16359   name = MemFree (name);
16360   return rval;
16361 }
16362 
16363 //Not used in Autodef or Cleanup
RemoveUnusedFieldTypes(FieldTypePtr PNTR orig_list)16364 static void RemoveUnusedFieldTypes (FieldTypePtr PNTR orig_list)
16365 {
16366   ValNodePtr vnp, prev = NULL, vnp_next;
16367 
16368   if (orig_list == NULL || *orig_list == NULL) {
16369     return;
16370   }
16371   for (vnp = *orig_list; vnp != NULL; vnp = vnp_next) {
16372     vnp_next = vnp->next;
16373     if (GetDeflinePosForFieldType (vnp) < 0) {
16374       if (prev == NULL) {
16375         *orig_list = vnp->next;
16376       } else {
16377         prev->next = vnp->next;
16378       }
16379       vnp->next = NULL;
16380       vnp = FieldTypeFree (vnp);
16381     } else {
16382       prev = vnp;
16383     }
16384   }
16385 }
16386 
16387 //Not used in Autodef or Cleanup
RemoveMatchingFieldType(FieldTypePtr PNTR orig_list,FieldTypePtr match)16388 static Boolean RemoveMatchingFieldType (FieldTypePtr PNTR orig_list, FieldTypePtr match)
16389 {
16390   ValNodePtr vnp, prev = NULL, vnp_next;
16391   Boolean    rval = FALSE;
16392 
16393   if (orig_list == NULL || *orig_list == NULL || match == NULL) {
16394     return rval;
16395   }
16396 
16397   for (vnp = *orig_list; vnp != NULL && !rval; vnp = vnp_next) {
16398     vnp_next = vnp->next;
16399     if (CompareFieldTypes (vnp, match) == 0) {
16400       if (prev == NULL) {
16401         *orig_list = vnp->next;
16402       } else {
16403         prev->next = vnp->next;
16404       }
16405       vnp->next = NULL;
16406       vnp = FieldTypeFree (vnp);
16407       rval = TRUE;
16408     } else {
16409       prev = vnp;
16410     }
16411   }
16412   return rval;
16413 }
16414 
16415 //Not used in Autodef or Cleanup
ListHasMatchingFieldType(FieldTypePtr list,FieldTypePtr match)16416 static Boolean ListHasMatchingFieldType (FieldTypePtr list, FieldTypePtr match)
16417 {
16418   Boolean rval = FALSE;
16419 
16420   if (list == NULL || match == NULL) return rval;
16421 
16422   while (list != NULL && !rval) {
16423     if (CompareFieldTypes (list, match) == 0) {
16424       rval = TRUE;
16425     } else {
16426       list = list->next;
16427     }
16428   }
16429   return rval;
16430 }
16431 
16432 
16433 static Int4 DefLineFieldTypeSortOrder [] = {
16434   Source_qual_strain,
16435   Source_qual_isolate,
16436   Source_qual_clone,
16437   Source_qual_haplotype,
16438   Source_qual_cultivar,
16439   Source_qual_specimen_voucher,
16440   Source_qual_ecotype,
16441   Source_qual_type,
16442   Source_qual_serotype,
16443   Source_qual_authority,
16444   Source_qual_breed
16445 };
16446 
16447 //Not used in Autodef or Cleanup
CompareFieldTypeByImportance(FieldTypePtr field1,FieldTypePtr field2)16448 static int CompareFieldTypeByImportance (FieldTypePtr field1, FieldTypePtr field2)
16449 {
16450   int rval = 0;
16451   Int4 index, num_defline_qual_sort_order;
16452   SourceQualChoicePtr scp1, scp2;
16453 
16454   if (field1 == NULL && field2 == NULL) {
16455     rval = 0;
16456   } else if (field1 == NULL) {
16457     rval = 1;
16458   } else if (field2 == NULL) {
16459     rval = -1;
16460   } else if (field1->choice == FieldType_source_qual && field2->choice == FieldType_source_qual) {
16461     scp1 = field1->data.ptrvalue;
16462     scp2 = field2->data.ptrvalue;
16463     num_defline_qual_sort_order = sizeof (DefLineFieldTypeSortOrder) / sizeof (Int4);
16464     for (index = 0; index < num_defline_qual_sort_order; index++)
16465     {
16466       if (scp1->data.intvalue == DefLineFieldTypeSortOrder [ index ]) return -1;
16467       if (scp2->data.intvalue == DefLineFieldTypeSortOrder [ index ]) return 1;
16468     }
16469     rval = CompareFieldTypes (field1, field2);
16470   } else {
16471     rval = CompareFieldTypes (field1, field2);
16472   }
16473   return rval;
16474 }
16475 
16476 //Not used in Autodef or Cleanup
SortFieldTypeByImportance(VoidPtr ptr1,VoidPtr ptr2)16477 static int LIBCALLBACK SortFieldTypeByImportance (
16478   VoidPtr ptr1,
16479   VoidPtr ptr2
16480 )
16481 {
16482   ValNodePtr  vnp1;
16483   ValNodePtr  vnp2;
16484 
16485   if (ptr1 == NULL && ptr2 == NULL) return 0;
16486 
16487   if (ptr1 == NULL && ptr2 != NULL) return -1;
16488   if (ptr1 != NULL && ptr2 == NULL) return 1;
16489 
16490   vnp1 = *((ValNodePtr PNTR) ptr1);
16491   vnp2 = *((ValNodePtr PNTR) ptr2);
16492   if (vnp1 == NULL || vnp2 == NULL) return 0;
16493   if (vnp1->data.ptrvalue == NULL || vnp2->data.ptrvalue == NULL) return 0;
16494 
16495   return CompareFieldTypeByImportance (vnp1, vnp2);
16496 }
16497 
16498 
16499 typedef struct uniqbiosource {
16500   BioSourcePtr biop;
16501   ValNodePtr   available_fields;
16502   ValNodePtr   strings;
16503 } UniqBioSourceData, PNTR UniqBioSourcePtr;
16504 //Not used in Autodef or Cleanup
AddQualToUniqBioSource(UniqBioSourcePtr u,FieldTypePtr field)16505 static Boolean AddQualToUniqBioSource (
16506   UniqBioSourcePtr u,
16507   FieldTypePtr     field
16508 )
16509 {
16510   CharPtr             val = NULL, tmp;
16511   SourceQualChoicePtr q;
16512   Boolean             rval = FALSE;
16513 
16514   if (u == NULL || field == NULL) return FALSE;
16515   if (field->choice != FieldType_source_qual) return FALSE;
16516   q = (SourceQualChoicePtr) field->data.ptrvalue;
16517   if (q == NULL || q->choice != SourceQualChoice_textqual) return FALSE;
16518 
16519   val = GetSourceQualFromBioSource (u->biop, q, NULL);
16520   if (StringHasNoText (val)) {
16521     val = MemFree (val);
16522   } else if (q->data.intvalue == Source_qual_specimen_voucher && StringNICmp (val, "personal:", 9) == 0) {
16523     tmp = StringSave (val + 9);
16524     val = MemFree (val);
16525     val = tmp;
16526   } else if (IsNonTextSourceQual (q->data.intvalue)) {
16527     val = MemFree (val);
16528     val = StringSave (GetSourceQualName (q->data.intvalue));
16529   }
16530   if (val != NULL) {
16531     ValNodeAddPointer (&(u->strings), 0, val);
16532     rval = TRUE;
16533   }
16534   RemoveMatchingFieldType (&(u->available_fields), field);
16535 
16536   u->available_fields = ValNodeSort (u->available_fields, SortFieldTypeByImportance);
16537   return rval;
16538 }
16539 
16540 //Not used in Autodef or Cleanup
UniqBioSourceNew(BioSourcePtr biop)16541 static UniqBioSourcePtr UniqBioSourceNew (BioSourcePtr biop)
16542 {
16543   UniqBioSourcePtr u;
16544 
16545   if (biop == NULL) return NULL;
16546   u = (UniqBioSourcePtr) MemNew (sizeof (UniqBioSourceData));
16547   u->biop = biop;
16548   u->available_fields = GetSourceQualFieldListFromBioSource (u->biop);
16549   u->strings = NULL;
16550 
16551   /* add tax name as first string */
16552   AddQualToUniqBioSource (u, u->available_fields);
16553   RemoveUnusedFieldTypes (&(u->available_fields));
16554 
16555   return u;
16556 }
16557 
16558 //Not used in Autodef or Cleanup
UniqBioSourceFree(UniqBioSourcePtr u)16559 static UniqBioSourcePtr UniqBioSourceFree (UniqBioSourcePtr u)
16560 {
16561   if (u != NULL) {
16562     u->available_fields = FieldTypeListFree (u->available_fields);
16563     u->strings = ValNodeFreeData (u->strings);
16564     u = MemFree (u);
16565   }
16566   return u;
16567 }
16568 
16569 //Not used in Autodef or Cleanup
UniqBioSourceCopy(UniqBioSourcePtr u)16570 static UniqBioSourcePtr UniqBioSourceCopy (UniqBioSourcePtr u)
16571 {
16572   UniqBioSourcePtr u2;
16573   ValNodePtr       vnp;
16574 
16575   if (u == NULL) return NULL;
16576   u2 = (UniqBioSourcePtr) MemNew (sizeof (UniqBioSourceData));
16577   u2->biop = u->biop;
16578   u2->available_fields = FieldTypeListCopy (u->available_fields);
16579   for (vnp = u->strings; vnp != NULL; vnp = vnp->next) {
16580     ValNodeAddPointer (&(u2->strings), 0, StringSave (vnp->data.ptrvalue));
16581   }
16582   return u2;
16583 }
16584 
16585 //Not used in Autodef or Cleanup
16586 /* The CompareOrganismDescriptors function compares the contents of the
16587  * lists of strings for each BioSrcDesc item.
16588  * The function returns:
16589  *     -1 if org1 < org2
16590  *      0 if org1 = org2
16591  *      1 if org1 > org2
16592  */
CompareUniqBioSource(UniqBioSourcePtr org1,UniqBioSourcePtr org2)16593 static int CompareUniqBioSource (
16594   UniqBioSourcePtr org1,
16595   UniqBioSourcePtr org2
16596 )
16597 {
16598   ValNodePtr vnp1, vnp2;
16599   int cmpval;
16600 
16601   vnp1 = org1->strings;
16602   vnp2 = org2->strings;
16603 
16604   while (vnp1 != NULL && vnp2 != NULL)
16605   {
16606     cmpval = StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
16607     if (cmpval != 0) return cmpval;
16608 
16609     vnp1 = vnp1->next;
16610     vnp2 = vnp2->next;
16611   }
16612   if (vnp1 == NULL && vnp2 == NULL)
16613   {
16614     return 0;
16615   }
16616   else if (vnp1 != NULL && vnp2 == NULL)
16617   {
16618     return 1;
16619   }
16620   else
16621   {
16622     return -1;
16623   }
16624 }
16625 
16626 //Not used in Autodef or Cleanup
RemoveFieldFromUniqBioSource(UniqBioSourcePtr u,FieldTypePtr field)16627 static Boolean RemoveFieldFromUniqBioSource (UniqBioSourcePtr u, FieldTypePtr field)
16628 {
16629   Boolean rval = FALSE;
16630 
16631   if (u != NULL) {
16632     rval = RemoveMatchingFieldType (&(u->available_fields), field);
16633   }
16634   return rval;
16635 }
16636 
16637 //Not used in Autodef or Cleanup
UniqBioSourceListFree(ValNodePtr list)16638 static ValNodePtr UniqBioSourceListFree (ValNodePtr list)
16639 {
16640   ValNodePtr list_next;
16641 
16642   while (list != NULL) {
16643     list_next = list->next;
16644     list->next = NULL;
16645     list->data.ptrvalue = UniqBioSourceFree (list->data.ptrvalue);
16646     list = ValNodeFree (list);
16647     list= list_next;
16648   }
16649   return list;
16650 }
16651 
16652 //Not used in Autodef or Cleanup
UniqBioSourceListCopy(ValNodePtr orig)16653 static ValNodePtr UniqBioSourceListCopy (ValNodePtr orig)
16654 {
16655   ValNodePtr list = NULL, prev = NULL, vnp;
16656   UniqBioSourcePtr u;
16657 
16658   while (orig != NULL) {
16659     u = (UniqBioSourcePtr) orig->data.ptrvalue;
16660     if (u != NULL && u->biop != NULL) {
16661       vnp = ValNodeNew (prev);
16662       vnp->choice = 0;
16663       vnp->data.ptrvalue = UniqBioSourceCopy (u);
16664       if (prev == NULL) {
16665         list = vnp;
16666       }
16667       prev = vnp;
16668     }
16669     orig = orig->next;
16670   }
16671   return list;
16672 }
16673 
16674 //Not used in Autodef or Cleanup
SortUniqBioSource(VoidPtr ptr1,VoidPtr ptr2)16675 static int LIBCALLBACK SortUniqBioSource (VoidPtr ptr1, VoidPtr ptr2)
16676 
16677 {
16678   ValNodePtr    vnp1, vnp2;
16679 
16680   if (ptr1 != NULL && ptr2 != NULL) {
16681     vnp1 = *((ValNodePtr PNTR) ptr1);
16682     vnp2 = *((ValNodePtr PNTR) ptr2);
16683     if (vnp1 != NULL && vnp2 != NULL) {
16684       return CompareUniqBioSource (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
16685     }
16686   }
16687   return 0;
16688 }
16689 
16690 //Not used in Autodef or Cleanup
UniqBioSourceListSort(ValNodePtr orig)16691 static ValNodePtr UniqBioSourceListSort (ValNodePtr orig)
16692 {
16693   orig = ValNodeSort (orig, SortUniqBioSource);
16694   return orig;
16695 }
16696 
16697 //Not used in Autodef or Cleanup
AddQualToUniqBioSourceList(ValNodePtr list,FieldTypePtr field)16698 static Boolean AddQualToUniqBioSourceList (ValNodePtr list, FieldTypePtr field)
16699 {
16700   Boolean rval = FALSE;
16701   while (list != NULL) {
16702     rval |= AddQualToUniqBioSource (list->data.ptrvalue, field);
16703     list = list->next;
16704   }
16705   return rval;
16706 }
16707 
16708 //Not used in Autodef or Cleanup
RemoveFieldFromUniqBioSourceList(ValNodePtr list,FieldTypePtr field)16709 static Boolean RemoveFieldFromUniqBioSourceList (ValNodePtr list, FieldTypePtr field)
16710 {
16711   Boolean rval = FALSE;
16712 
16713   while (list != NULL) {
16714     rval |= RemoveFieldFromUniqBioSource (list->data.ptrvalue, field);
16715     list = list->next;
16716   }
16717   return rval;
16718 }
16719 
16720 
16721 /* The UniqBioSrcGrp structure contains a list of UniqBioSrc items
16722  * for which the contents of the descriptive strings list are identical,
16723  * i.e., all the organisms in the group would have the same description
16724  * if you used the modifiers used to generate this list of strings.
16725  * The structure also contains the number of organisms in the list
16726  * so that it will be easy to tell that the UniqBioSrcGrp now contains a
16727  * single organism with a unique description.
16728  */
16729 typedef struct uniqbiosrcgrp {
16730   Int4 num_biop;
16731   ValNodePtr biop_list;
16732 } UniqBioSrcGrpData, PNTR UniqBioSrcGrpPtr;
16733 
16734 //Not used in Autodef or Cleanup
UniqBioSrcGrpNew(ValNodePtr biop_list)16735 static UniqBioSrcGrpPtr UniqBioSrcGrpNew (ValNodePtr biop_list)
16736 {
16737   UniqBioSrcGrpPtr g;
16738 
16739   g = (UniqBioSrcGrpPtr) MemNew (sizeof (UniqBioSrcGrpData));
16740   g->num_biop = ValNodeLen (biop_list);
16741   g->biop_list = UniqBioSourceListCopy (biop_list);
16742   return g;
16743 }
16744 
16745 //Not used in Autodef or Cleanup
UniqBioSrcGrpFree(UniqBioSrcGrpPtr g)16746 static UniqBioSrcGrpPtr UniqBioSrcGrpFree (UniqBioSrcGrpPtr g)
16747 {
16748   if (g != NULL) {
16749     g->biop_list = UniqBioSourceListFree (g->biop_list);
16750     g = MemFree (g);
16751   }
16752   return g;
16753 }
16754 
16755 //Not used in Autodef or Cleanup
UniqBioSrcGrpCopy(UniqBioSrcGrpPtr orig)16756 static UniqBioSrcGrpPtr UniqBioSrcGrpCopy (UniqBioSrcGrpPtr orig)
16757 {
16758   UniqBioSrcGrpPtr g;
16759 
16760   if (orig == NULL) {
16761     return NULL;
16762   }
16763 
16764   g = UniqBioSrcGrpNew (orig->biop_list);
16765   return g;
16766 }
16767 
16768 //Not used in Autodef or Cleanup
AddQualToUniqBioSrcGrp(UniqBioSrcGrpPtr g,FieldTypePtr field)16769 static Boolean AddQualToUniqBioSrcGrp (UniqBioSrcGrpPtr g, FieldTypePtr field)
16770 {
16771   Boolean rval = FALSE;
16772 
16773   if (g != NULL) {
16774     rval = AddQualToUniqBioSourceList (g->biop_list, field);
16775     if (rval) {
16776       g->biop_list = UniqBioSourceListSort (g->biop_list);
16777     }
16778   }
16779   return rval;
16780 }
16781 
16782 //Not used in Autodef or Cleanup
RemoveFieldFromUniqBioSrcGrp(UniqBioSrcGrpPtr g,FieldTypePtr field)16783 static Boolean RemoveFieldFromUniqBioSrcGrp (UniqBioSrcGrpPtr g, FieldTypePtr field)
16784 {
16785   Boolean rval = FALSE;
16786 
16787   if (g != NULL) {
16788     rval = RemoveFieldFromUniqBioSourceList (g->biop_list, field);
16789   }
16790   return rval;
16791 }
16792 
16793 //Not used in Autodef or Cleanup
GetAllPresentQualsForGroup(UniqBioSrcGrpPtr g)16794 static FieldTypePtr GetAllPresentQualsForGroup (UniqBioSrcGrpPtr g)
16795 {
16796   ValNodePtr vnp;
16797   FieldTypePtr match_list = NULL, ft, ft_next, ft_prev;
16798   UniqBioSourcePtr u;
16799 
16800   if (g == NULL || g->num_biop < 2) {
16801     return NULL;
16802   }
16803 
16804   u = g->biop_list->data.ptrvalue;
16805   match_list = FieldTypeListCopy (u->available_fields);
16806   for (vnp = g->biop_list->next; vnp != NULL && match_list != NULL; vnp = vnp->next) {
16807     u = vnp->data.ptrvalue;
16808     ft = match_list;
16809     ft_prev = NULL;
16810     while (ft != NULL) {
16811       ft_next = ft->next;
16812       if (ListHasMatchingFieldType (u->available_fields, ft)) {
16813         ft_prev = ft;
16814       } else{
16815         if (ft_prev == NULL) {
16816           match_list = ft->next;
16817         } else {
16818           ft_prev->next = ft->next;
16819         }
16820         ft->next = NULL;
16821         ft = FieldTypeFree (ft);
16822       }
16823       ft = ft_next;
16824     }
16825   }
16826   return match_list;
16827 }
16828 
16829 //Not used in Autodef or Cleanup
GetAllQualsForGroup(UniqBioSrcGrpPtr g)16830 static FieldTypePtr GetAllQualsForGroup (UniqBioSrcGrpPtr g)
16831 {
16832   ValNodePtr vnp, tmp;
16833   FieldTypePtr field_list_head = NULL;
16834   FieldTypePtr field_list_tail = NULL;
16835   UniqBioSourcePtr u;
16836 
16837   if (g == NULL || g->num_biop < 2) {
16838     return NULL;
16839   }
16840 
16841   for (vnp = g->biop_list; vnp != NULL; vnp = vnp->next) {
16842     u = vnp->data.ptrvalue;
16843     if (u != NULL && u->available_fields != NULL) {
16844       tmp = ValNodeLink (&field_list_tail, FieldTypeListCopy (u->available_fields));
16845       if (field_list_head == NULL) {
16846         field_list_head = tmp;
16847       }
16848       if (tmp != NULL) {
16849         while (tmp->next != NULL) {
16850           tmp = tmp->next;
16851         }
16852       }
16853       field_list_tail = tmp;
16854     }
16855   }
16856   SortUniqueFieldTypeList (&field_list_head);
16857   return field_list_head;
16858 }
16859 
16860 //Not used in Autodef or Cleanup
UniqBioSrcGrpListFree(ValNodePtr list)16861 static ValNodePtr UniqBioSrcGrpListFree (ValNodePtr list)
16862 {
16863   ValNodePtr list_next;
16864 
16865   while (list != NULL) {
16866     list_next = list->next;
16867     list->next = NULL;
16868     list->data.ptrvalue = UniqBioSrcGrpFree (list->data.ptrvalue);
16869     list = ValNodeFree (list);
16870     list = list_next;
16871   }
16872   return list;
16873 }
16874 
16875 //Not used in Autodef or Cleanup
UniqBioSrcGrpListCopy(ValNodePtr orig)16876 static ValNodePtr UniqBioSrcGrpListCopy (ValNodePtr orig)
16877 {
16878   ValNodePtr list = NULL, prev = NULL, vnp;
16879   UniqBioSrcGrpPtr g;
16880 
16881   while (orig != NULL) {
16882     g = (UniqBioSrcGrpPtr) orig->data.ptrvalue;
16883     if (g != NULL) {
16884       vnp = ValNodeNew (prev);
16885       vnp->choice = 0;
16886       vnp->data.ptrvalue = UniqBioSrcGrpCopy (g);
16887       if (prev == NULL) {
16888         list = vnp;
16889       }
16890       prev = vnp;
16891     }
16892     orig = orig->next;
16893   }
16894   return list;
16895 }
16896 
16897 //Not used in Autodef or Cleanup
16898 /* NOTE - we want to sort groups from most biops to least biops */
SortUniqBioSrcGrp(VoidPtr ptr1,VoidPtr ptr2)16899 static int LIBCALLBACK SortUniqBioSrcGrp (VoidPtr ptr1, VoidPtr ptr2)
16900 
16901 {
16902   ValNodePtr    vnp1, vnp2;
16903   UniqBioSrcGrpPtr g1, g2;
16904   int              rval = 0;
16905 
16906   if (ptr1 != NULL && ptr2 != NULL) {
16907     vnp1 = *((ValNodePtr PNTR) ptr1);
16908     vnp2 = *((ValNodePtr PNTR) ptr2);
16909     if (vnp1 != NULL && vnp2 != NULL && vnp1->data.ptrvalue != NULL && vnp2->data.ptrvalue != NULL) {
16910       g1 = (UniqBioSrcGrpPtr) vnp1->data.ptrvalue;
16911       g2 = (UniqBioSrcGrpPtr) vnp2->data.ptrvalue;
16912       if (g1->num_biop > g2->num_biop) {
16913         rval = -1;
16914       } else if (g1->num_biop < g2->num_biop) {
16915         rval = 1;
16916       }
16917     }
16918   }
16919   return rval;
16920 }
16921 
16922 //Not used in Autodef or Cleanup
BioSrcGrpListSort(ValNodePtr orig)16923 static ValNodePtr BioSrcGrpListSort (ValNodePtr orig)
16924 {
16925   orig = ValNodeSort (orig, SortUniqBioSrcGrp);
16926   return orig;
16927 }
16928 
16929 //Not used in Autodef or Cleanup
RemoveFieldFromUniqBioSrcGrpList(ValNodePtr list,FieldTypePtr field)16930 static Boolean RemoveFieldFromUniqBioSrcGrpList (ValNodePtr list, FieldTypePtr field)
16931 {
16932   Boolean rval = FALSE;
16933 
16934   while (list != NULL) {
16935     rval |= RemoveFieldFromUniqBioSrcGrp (list->data.ptrvalue, field);
16936     list = list->next;
16937   }
16938   return rval;
16939 }
16940 
16941 //Not used in Autodef or Cleanup
ReGroupUniqBioSrcGrpList(ValNodePtr list)16942 static void ReGroupUniqBioSrcGrpList (ValNodePtr list)
16943 {
16944   ValNodePtr list_next, vnp;
16945   UniqBioSrcGrpPtr g, g2;
16946 
16947   while (list != NULL) {
16948     g = (UniqBioSrcGrpPtr) list->data.ptrvalue;
16949     vnp = g->biop_list;
16950     while (vnp != NULL && vnp->next != NULL
16951            && CompareUniqBioSource (vnp->data.ptrvalue, vnp->next->data.ptrvalue) == 0) {
16952       vnp = vnp->next;
16953     }
16954     if (vnp != NULL && vnp->next != NULL) {
16955       g2 = UniqBioSrcGrpNew (NULL);
16956       g2->biop_list = vnp->next;
16957       g2->num_biop = ValNodeLen (vnp->next);
16958       vnp->next = NULL;
16959       g->num_biop -= g2->num_biop;
16960       list_next = ValNodeNew (NULL);
16961       list_next->data.ptrvalue = g2;
16962       list_next->next = list->next;
16963       list->next = list_next;
16964     }
16965     list = list->next;
16966   }
16967 }
16968 
16969 //Not used in Autodef or Cleanup
FindMaxOrgsInUniqBioSrcGrpList(ValNodePtr list)16970 static Int4 FindMaxOrgsInUniqBioSrcGrpList (ValNodePtr list)
16971 {
16972   Int4 max = 0;
16973   UniqBioSrcGrpPtr g;
16974 
16975   while (list != NULL) {
16976     g = (UniqBioSrcGrpPtr) list->data.ptrvalue;
16977     if (g != NULL && g->num_biop > max) {
16978       max = g->num_biop;
16979     }
16980     list = list->next;
16981   }
16982   return max;
16983 }
16984 
16985 //Not used in Autodef or Cleanup
CountUniqueOrgsInUniqBioSrcGrpList(ValNodePtr list)16986 static Int4 CountUniqueOrgsInUniqBioSrcGrpList (ValNodePtr list)
16987 {
16988   Int4 count = 0;
16989   UniqBioSrcGrpPtr g;
16990 
16991   while (list != NULL) {
16992     g = (UniqBioSrcGrpPtr) list->data.ptrvalue;
16993     if (g != NULL && g->num_biop == 1) {
16994       count++;
16995     }
16996     list = list->next;
16997   }
16998   return count;
16999 }
17000 
17001 //Not used in Autodef or Cleanup
AddQualToUniqBioSrcGrpList(ValNodePtr list,FieldTypePtr field)17002 static Boolean AddQualToUniqBioSrcGrpList (ValNodePtr list, FieldTypePtr field)
17003 {
17004   Boolean    rval = FALSE;
17005   ValNodePtr vnp;
17006 
17007   vnp = list;
17008   while (vnp != NULL) {
17009     rval |= AddQualToUniqBioSrcGrp (vnp->data.ptrvalue, field);
17010     vnp = vnp->next;
17011   }
17012   if (rval) {
17013     /* regroup */
17014     ReGroupUniqBioSrcGrpList (list);
17015   }
17016   return rval;
17017 }
17018 
17019 
17020 typedef struct qualcombo {
17021   Int4         num_groups;
17022   Int4         num_mods;
17023   Int4         max_orgs_in_group;
17024   Int4         num_unique_orgs;
17025   FieldTypePtr field_list;
17026   ValNodePtr   group_list;
17027 } QualComboData, PNTR QualComboPtr;
17028 
17029 //Not used in Autodef or Cleanup
17030 /* This function creates a new ModifierCombination item using the supplied
17031  * OrgGroup list.  It calculates the number of groups, maximum number of
17032  * organisms in any one group, and number of unique organisms.
17033  * Initially there are no modifiers.
17034  */
QualComboNew(ValNodePtr grp_list)17035 static QualComboPtr QualComboNew (ValNodePtr grp_list)
17036 {
17037   QualComboPtr newm;
17038 
17039   newm = (QualComboPtr) MemNew (sizeof (QualComboData));
17040   if (newm == NULL) return NULL;
17041 
17042   newm->num_mods = 0;
17043   newm->field_list = NULL;
17044 
17045   /* copy groups */
17046   newm->group_list = UniqBioSrcGrpListCopy (grp_list);
17047 
17048   newm->max_orgs_in_group = FindMaxOrgsInUniqBioSrcGrpList (newm->group_list);
17049   newm->num_unique_orgs = CountUniqueOrgsInUniqBioSrcGrpList (newm->group_list);
17050   newm->num_groups = ValNodeLen (newm->group_list);
17051 
17052   return newm;
17053 }
17054 
17055 //Not used in Autodef or Cleanup
17056 /* The CopyQualCombo creates a copy of a QualCombo item.
17057  * This includes creating a copy of the number and list of modifiers
17058  * and a copy of the number and list of OrgGroups, as well as copying the
17059  * maximum number of organisms in any one group and the number of unique
17060  * organism descriptions produced by this combination of modifiers.
17061  */
QualComboCopy(QualComboPtr m)17062 static QualComboPtr QualComboCopy (
17063   QualComboPtr m
17064 )
17065 {
17066   QualComboPtr newm;
17067 
17068   newm = QualComboNew (m->group_list);
17069   if (newm == NULL) return NULL;
17070 
17071   newm->field_list = FieldTypeListCopy (m->field_list);
17072   newm->num_mods = m->num_mods;
17073 
17074   return newm;
17075 }
17076 
17077 //Not used in Autodef or Cleanup
17078 /* This function frees the memory associated with a list of
17079  * ModifierCombination items.
17080  */
QualComboFree(QualComboPtr m)17081 static QualComboPtr QualComboFree (
17082   QualComboPtr m
17083 )
17084 {
17085   if (m != NULL) {
17086     m->group_list = UniqBioSrcGrpListFree (m->group_list);
17087     m->field_list = FieldTypeListFree (m->field_list);
17088     m = MemFree (m);
17089   }
17090   return m;
17091 }
17092 
17093 //Not used in Autodef or Cleanup
TESTDisplayQualCombo(QualComboPtr q)17094 static void TESTDisplayQualCombo (QualComboPtr q)
17095 {
17096   ValNodePtr vnp_t, vnp_b, vnp_q, vnp_f;
17097   UniqBioSrcGrpPtr g;
17098   UniqBioSourcePtr bio;
17099   Int4             val;
17100 
17101   for (vnp_t = q->group_list; vnp_t != NULL; vnp_t = vnp_t->next) {
17102     g = (UniqBioSrcGrpPtr) vnp_t->data.ptrvalue;
17103     for (vnp_b = g->biop_list; vnp_b != NULL; vnp_b = vnp_b->next) {
17104       bio = (UniqBioSourcePtr) vnp_b->data.ptrvalue;
17105       for (vnp_q = bio->available_fields; vnp_q != NULL; vnp_q = vnp_q->next) {
17106         vnp_f = (ValNodePtr) vnp_q->data.ptrvalue;
17107         val = vnp_f->data.intvalue;
17108       }
17109     }
17110   }
17111 }
17112 
17113 //Not used in Autodef or Cleanup
AddQualToQualCombo(QualComboPtr m,FieldTypePtr field)17114 static Boolean AddQualToQualCombo (
17115   QualComboPtr m,
17116   FieldTypePtr field
17117 )
17118 {
17119   Boolean    rval = FALSE;
17120 
17121   if (m == NULL || field == NULL) return rval;
17122 
17123 
17124   if (AddQualToUniqBioSrcGrpList (m->group_list, field)) {
17125     ValNodeLink (&(m->field_list), AsnIoMemCopy (field, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite));
17126     m->field_list = ValNodeSort (m->field_list, SortFieldTypeByImportance);
17127     m->num_mods ++;
17128     m->group_list = BioSrcGrpListSort (m->group_list);
17129     m->max_orgs_in_group = FindMaxOrgsInUniqBioSrcGrpList (m->group_list);
17130     m->num_unique_orgs = CountUniqueOrgsInUniqBioSrcGrpList (m->group_list);
17131     m->num_groups = ValNodeLen (m->group_list);
17132     rval = TRUE;
17133   }
17134   return rval;
17135 }
17136 
17137 //Not used in Autodef or Cleanup
QualComboListFree(ValNodePtr list)17138 static ValNodePtr LIBCALLBACK QualComboListFree (ValNodePtr list)
17139 {
17140   ValNodePtr list_next;
17141 
17142   while (list != NULL) {
17143     list_next = list->next;
17144     list->next = NULL;
17145     list->data.ptrvalue = QualComboFree (list->data.ptrvalue);
17146     list = ValNodeFree (list);
17147     list = list_next;
17148   }
17149   return list;
17150 }
17151 
17152 //Not used in Autodef or Cleanup
17153 /* NOTE - we want to sort groups from most unique organisms to least unique organisms */
17154 /* secondary sort - most groups to least groups */
17155 /* tertiary sort - fewer max orgs in group to most max orgs in group */
17156 /* fourth sort - least mods to most mods */
SortQualCombo(VoidPtr ptr1,VoidPtr ptr2)17157 static int LIBCALLBACK SortQualCombo (VoidPtr ptr1, VoidPtr ptr2)
17158 
17159 {
17160   ValNodePtr    vnp1, vnp2;
17161   QualComboPtr  g1, g2;
17162   FieldTypePtr  field1, field2;
17163   int           rval = 0;
17164 
17165   if (ptr1 != NULL && ptr2 != NULL) {
17166     vnp1 = *((ValNodePtr PNTR) ptr1);
17167     vnp2 = *((ValNodePtr PNTR) ptr2);
17168     if (vnp1 != NULL && vnp2 != NULL && vnp1->data.ptrvalue != NULL && vnp2->data.ptrvalue != NULL) {
17169       g1 = (QualComboPtr) vnp1->data.ptrvalue;
17170       g2 = (QualComboPtr) vnp2->data.ptrvalue;
17171       if (g1->num_unique_orgs > g2->num_unique_orgs) {
17172         rval = -1;
17173       } else if (g1->num_unique_orgs < g2->num_unique_orgs) {
17174         rval = 1;
17175       } else if (g1->num_groups > g2->num_groups) {
17176         rval = -1;
17177       } else if (g1->num_groups < g2->num_groups) {
17178         rval = 1;
17179       } else if (g1->max_orgs_in_group < g2->max_orgs_in_group) {
17180         rval = -1;
17181       } else if (g1->max_orgs_in_group > g2->max_orgs_in_group) {
17182         rval = 1;
17183       } else if (g1->num_mods < g2->num_mods) {
17184         rval = -1;
17185       } else if (g1->num_mods > g2->num_mods) {
17186         rval = 1;
17187       } else {
17188         /* compare modifiers */
17189         field1 = g1->field_list;
17190         field2 = g2->field_list;
17191         while (field1 != NULL && field2 != NULL
17192                && (rval = CompareFieldTypeByImportance (field1, field2)) == 0) {
17193           field1 = field1->next;
17194           field2 = field2->next;
17195         }
17196         if (rval == 0) {
17197           if (field1 == NULL && field2 != NULL) {
17198             rval = -1;
17199           } else if (field1 != NULL && field2 == NULL) {
17200             rval = 1;
17201           }
17202         }
17203       }
17204     }
17205   }
17206   return rval;
17207 }
17208 
17209 //Not used in Autodef or Cleanup
QualComboListSort(ValNodePtr orig)17210 static ValNodePtr QualComboListSort (ValNodePtr orig)
17211 {
17212   orig = ValNodeSort (orig, SortQualCombo);
17213   ValNodeUnique (&orig, SortQualCombo, QualComboListFree);
17214   return orig;
17215 }
17216 
17217 
17218 //Not used in Autodef or Cleanup
ExpandOneComboListUsingAllPresentQuals(QualComboPtr q)17219 static ValNodePtr ExpandOneComboListUsingAllPresentQuals (QualComboPtr q)
17220 {
17221   ValNodePtr new_list = NULL, vnp, vnp_m;
17222   FieldTypePtr match_list;
17223   UniqBioSrcGrpPtr g;
17224   QualComboPtr q_new;
17225   Boolean      found_group_improvement = FALSE;
17226 
17227   if (q == NULL) return NULL;
17228   for (vnp = q->group_list; vnp != NULL && !found_group_improvement; vnp = vnp->next) {
17229     g = (UniqBioSrcGrpPtr) vnp->data.ptrvalue;
17230     if (g->num_biop == 1) break;
17231     match_list = GetAllPresentQualsForGroup (g);
17232     for (vnp_m = match_list; vnp_m != NULL; vnp_m = vnp_m->next) {
17233       q_new = QualComboCopy (q);
17234       if (AddQualToQualCombo (q_new, vnp_m) && q_new->num_groups > q->num_groups) {
17235         ValNodeAddPointer (&new_list, 0, q_new);
17236         found_group_improvement = TRUE;
17237       } else {
17238         q_new = QualComboFree (q_new);
17239         RemoveFieldFromUniqBioSrcGrp (g, vnp_m);
17240       }
17241     }
17242     match_list = FieldTypeListFree (match_list);
17243   }
17244   return new_list;
17245 }
17246 
17247 //Not used in Autodef or Cleanup
TESTDisplayList(ValNodePtr new_list)17248 static void TESTDisplayList (ValNodePtr new_list)
17249 {
17250   QualComboPtr q;
17251   ValNodePtr vnp;
17252 
17253   for (vnp = new_list; vnp != NULL; vnp = vnp->next) {
17254     q = (QualComboPtr) vnp->data.ptrvalue;
17255     TESTDisplayQualCombo (q);
17256   }
17257 
17258 }
17259 
17260 //Not used in Autodef or Cleanup
IsQualOkForDefline(ValNodePtr vnp)17261 static Boolean IsQualOkForDefline (ValNodePtr vnp)
17262 {
17263   ValNodePtr scp;
17264 
17265   if (vnp == NULL || vnp->choice != FieldType_source_qual
17266       || (scp = (SourceQualChoicePtr) vnp->data.ptrvalue) == NULL
17267       || scp->choice != SourceQualChoice_textqual) {
17268     return FALSE;
17269   }
17270   if (scp->data.intvalue == Source_qual_map || scp->data.intvalue == Source_qual_nat_host) {
17271     return FALSE;
17272   } else {
17273     return TRUE;
17274   }
17275 }
17276 
17277 
17278 //Not used in Autodef or Cleanup
ExpandOneComboListUsingAnyPresentQuals(QualComboPtr q)17279 static ValNodePtr ExpandOneComboListUsingAnyPresentQuals (QualComboPtr q)
17280 {
17281   ValNodePtr new_list = NULL, vnp, vnp_m;
17282   FieldTypePtr match_list;
17283   UniqBioSrcGrpPtr g;
17284   QualComboPtr q_new;
17285   Boolean      found_group_improvement = FALSE;
17286 
17287   if (q == NULL) return NULL;
17288   for (vnp = q->group_list; vnp != NULL && !found_group_improvement; vnp = vnp->next) {
17289     g = (UniqBioSrcGrpPtr) vnp->data.ptrvalue;
17290     if (g->num_biop == 1) break;
17291     match_list = GetAllQualsForGroup (g);
17292     for (vnp_m = match_list; vnp_m != NULL; vnp_m = vnp_m->next) {
17293       if (!IsQualOkForDefline(vnp_m)) {
17294         RemoveFieldFromUniqBioSrcGrp (g, vnp_m);
17295       } else {
17296         q_new = QualComboCopy (q);
17297         if (AddQualToQualCombo (q_new, vnp_m) && q_new->num_groups > q->num_groups) {
17298           ValNodeAddPointer (&new_list, 0, q_new);
17299           found_group_improvement = TRUE;
17300         } else {
17301           q_new = QualComboFree (q_new);
17302           RemoveFieldFromUniqBioSrcGrp (g, vnp_m);
17303         }
17304       }
17305     }
17306     match_list = FieldTypeListFree (match_list);
17307   }
17308 
17309   return new_list;
17310 }
17311 
17312 //Not used in Autodef or Cleanup
ExpandComboList(ValNodePtr PNTR list)17313 static Boolean ExpandComboList (ValNodePtr PNTR list)
17314 {
17315   QualComboPtr  q;
17316   ValNodePtr    new_list, vnp, vnp_next, prev = NULL;
17317   Boolean       any_expansion = FALSE;
17318 
17319   if (*list == NULL) return FALSE;
17320   vnp = *list;
17321   while (vnp != NULL) {
17322     vnp_next = vnp->next;
17323     q = (QualComboPtr) vnp->data.ptrvalue;
17324     new_list = ExpandOneComboListUsingAnyPresentQuals (q);
17325 
17326     if (new_list == NULL) {
17327       prev = vnp;
17328     } else {
17329       if (prev == NULL) {
17330         *list = new_list;
17331       } else {
17332         prev->next = new_list;
17333       }
17334       prev = new_list;
17335       while (prev->next != NULL) {
17336         prev = prev->next;
17337       }
17338       ValNodeLink (&new_list, vnp->next);
17339       vnp->next = NULL;
17340       vnp = QualComboListFree (vnp);
17341       any_expansion = TRUE;
17342     }
17343     vnp = vnp_next;
17344   }
17345   return any_expansion;
17346 }
17347 
17348 //Not used in Autodef or Cleanup
BuildUniqBioSrcList(BioSourcePtr biop,Pointer userdata)17349 static void BuildUniqBioSrcList (
17350   BioSourcePtr biop,
17351   Pointer userdata
17352 )
17353 {
17354   UniqBioSourcePtr u;
17355   ValNodeBlockPtr  vnbp;
17356   ValNodePtr       vnp;
17357 
17358   u = UniqBioSourceNew (biop);
17359   vnbp = (ValNodeBlockPtr) userdata;
17360   vnp = ValNodeAddPointer (&vnbp->tail, 0, u);
17361   if (vnbp->head == NULL) {
17362     vnbp->head = vnp;
17363   }
17364   vnbp->tail = vnp;
17365 }
17366 
17367 //Not used in Autodef or Cleanup
17368 /* The function FindBestQualCombo tries to find the best combination of modifiers
17369  * to create unique organism descriptions.  This is accomplished by
17370  * creating a list of required modifiers, and then creating a list of
17371  * combinations of modifiers by adding modifiers one at a time
17372  * to see if the additional modifiers provide any more differentiation in
17373  * the list.
17374  * In order to do this, I start with a list of required modifiers, and
17375  * then create copies of this list.  For each copy I add one of the modifiers
17376  * that are present in the bio sources and not already on the list.
17377  * If adding the modifier increases the differentiation, I add that copy to
17378  * the list of possible combinations, otherwise I discard it.
17379  * The function then makes copies of all of the new items added to the list,
17380  * starting with the item pointed to by start_of_expand, and adds another
17381  * modifier to each combination, keeping the combinations that increase
17382  * the differentiation and discarding the rest.
17383  * This process continues until I have a combination that produces completely
17384  * differentiated bio sources, or I run out of possible combinations.
17385  * If the list of possible combinations is exhausted before each organism
17386  * has a unique description, the function selects the combination from the
17387  * list with the largest number of unique organism descriptions.  If more
17388  * than one combination produces the largest number of unique organisms,
17389  * the combination with the largest number of unique organisms and the
17390  * largest number of groups will be selected.
17391  */
FindBestQualComboEx(ValNodePtr PNTR biop_list,ModifierItemLocalPtr ItemList)17392 static QualComboPtr FindBestQualComboEx(ValNodePtr PNTR biop_list, ModifierItemLocalPtr ItemList)
17393 {
17394   QualComboPtr initial_combo = NULL, best_combo = NULL;
17395   ValNodePtr   group_list = NULL, combo_list = NULL;
17396   UniqBioSrcGrpPtr g;
17397   SourceQualChoice scd;
17398   FieldType ft;
17399   Int4      i, qual;
17400 
17401   if (biop_list == NULL || *biop_list == NULL) {
17402     return NULL;
17403   }
17404 
17405   /* sort organisms */
17406   *biop_list = UniqBioSourceListSort (*biop_list);
17407 
17408   /* create group list */
17409   g = UniqBioSrcGrpNew (*biop_list);
17410   ValNodeAddPointer (&group_list, 0, g);
17411 
17412   ReGroupUniqBioSrcGrpList (group_list);
17413   group_list = BioSrcGrpListSort (group_list);
17414 
17415   /* create combo with just the org groups */
17416   initial_combo = QualComboNew (group_list);
17417   group_list = UniqBioSrcGrpListFree (group_list);
17418   if (initial_combo == NULL) return NULL;
17419 
17420   /* add required quals */
17421   ft.choice = FieldType_source_qual;
17422   ft.data.ptrvalue = &scd;
17423   ft.next = NULL;
17424   scd.choice = SourceQualChoice_textqual;
17425   if (ItemList == NULL) {
17426     /* endogenous virus name */
17427     scd.data.intvalue = Source_qual_endogenous_virus_name;
17428     AddQualToQualCombo (initial_combo, &ft);
17429     /* plasmid name */
17430     scd.data.intvalue = Source_qual_plasmid_name;
17431     AddQualToQualCombo (initial_combo, &ft);
17432     /* transgenic */
17433     scd.data.intvalue = Source_qual_transgenic;
17434     AddQualToQualCombo (initial_combo, &ft);
17435   } else {
17436     for (i = 0; i < numDefLineModifiers; i++) {
17437       if (ItemList[i].required && ItemList[i].any_present) {
17438         qual = GetSrcQualFromSubSrcOrOrgMod (DefLineModifiers[i].subtype, DefLineModifiers[i].isOrgMod);
17439         if (qual > -1) {
17440           scd.data.intvalue = qual;
17441           AddQualToQualCombo (initial_combo, &ft);
17442         }
17443       }
17444     }
17445   }
17446 
17447   if (initial_combo->max_orgs_in_group == 1)
17448   {
17449     /* we're done - they're all unique */
17450     return initial_combo;
17451   }
17452 
17453   /* they're not unique yet.  Need to find a combination of modifiers that will make this as unique as possible */
17454   ValNodeAddPointer (&combo_list, 0, initial_combo);
17455   best_combo = initial_combo;
17456   while (ExpandComboList (&combo_list)) {
17457     /* sort after expansion */
17458     combo_list = QualComboListSort (combo_list);
17459     best_combo = combo_list->data.ptrvalue;
17460     if (best_combo->max_orgs_in_group == 1) {
17461       break;
17462     }
17463   }
17464   best_combo = QualComboCopy (best_combo);
17465   combo_list = QualComboListFree (combo_list);
17466   return best_combo;
17467 }
17468 
17469 //Not used in Autodef or Cleanup
FindBestQualCombo(SeqEntryPtr sep,ModifierItemLocalPtr ItemList)17470 static QualComboPtr FindBestQualCombo(SeqEntryPtr sep, ModifierItemLocalPtr ItemList)
17471 {
17472   QualComboPtr  best_combo;
17473   ValNodeBlock  vnb;
17474 
17475   /* first, get list of organisms */
17476   vnb.head = NULL;
17477   vnb.tail = NULL;
17478   VisitBioSourcesInSep (sep, &vnb, BuildUniqBioSrcList);
17479 
17480   best_combo = FindBestQualComboEx (&vnb.head, ItemList);
17481 
17482   UniqBioSourceListFree (vnb.head);
17483   return best_combo;
17484 }
17485 
17486 //Not used in Autodef or Cleanup
ModifierCombinationFromQualCombo(QualComboPtr q)17487 static ModifierCombinationPtr ModifierCombinationFromQualCombo (QualComboPtr q)
17488 {
17489   ModifierCombinationPtr m;
17490   FieldTypePtr           field;
17491   Int4                   i;
17492 
17493   if (q == NULL) {
17494     return NULL;
17495   }
17496 
17497   m = (ModifierCombinationPtr) MemNew (sizeof (ModifierCombinationData));
17498   m->num_groups = q->num_groups;
17499   m->num_mods = q->num_mods;
17500   m->max_orgs_in_group = q->max_orgs_in_group;
17501   m->num_unique_orgs = q->num_unique_orgs;
17502   m->next = NULL;
17503   m->group_list = NULL;
17504   m->modifier_indices = NULL;
17505   for (field = q->field_list; field != NULL; field = field->next) {
17506     i = GetDeflinePosForFieldType (field);
17507     if (i >= 0) {
17508       ValNodeAddInt (&(m->modifier_indices), 0, i);
17509     }
17510   }
17511   return m;
17512 }
17513 
17514 
17515 //Not used in Autodef or Cleanup
FindBestModifiersForDeflineClauseList(ValNodePtr defline_clauses,ModifierItemLocalPtr ItemList)17516 NLM_EXTERN ValNodePtr FindBestModifiersForDeflineClauseList (
17517   ValNodePtr defline_clauses,
17518   ModifierItemLocalPtr ItemList
17519 )
17520 
17521 {
17522   QualComboPtr best_combo;
17523   ValNodePtr   biop_list = NULL, vnp;
17524   DefLineFeatClausePtr df;
17525   SeqDescrPtr       sdp;
17526   SeqMgrDescContext context;
17527   UniqBioSourcePtr  u;
17528   ModifierCombinationPtr m;
17529   ValNodePtr modifier_indices = NULL;
17530 
17531   /* first, create list of organisms */
17532   for (vnp = defline_clauses; vnp != NULL; vnp = vnp->next) {
17533     df = (DefLineFeatClausePtr) vnp->data.ptrvalue;
17534     if (df != NULL) {
17535       sdp = SeqMgrGetNextDescriptor (df->bsp, NULL, Seq_descr_source, &context);
17536       if (sdp != NULL && sdp->data.ptrvalue != NULL) {
17537         u = UniqBioSourceNew (sdp->data.ptrvalue);
17538         ValNodeAddPointer (&(u->strings), 0, StringSave (df->clauselist));
17539         ValNodeAddPointer (&biop_list, 0, u);
17540       }
17541     }
17542   }
17543 
17544   best_combo = FindBestQualComboEx (&biop_list, ItemList);
17545 
17546   biop_list = UniqBioSourceListFree (biop_list);
17547   m = ModifierCombinationFromQualCombo (best_combo);
17548   if (m != NULL) {
17549     modifier_indices = CopyModifierIndices (m->modifier_indices);
17550     FreeModifierCombo (m);
17551   }
17552   return modifier_indices;
17553 }
17554 
17555 //Not used in Autodef or Cleanup
FindBestModifiersEx(SeqEntryPtr sep,ModifierItemLocalPtr ItemList,Boolean use_new)17556 NLM_EXTERN ValNodePtr FindBestModifiersEx(
17557   SeqEntryPtr sep,
17558   ModifierItemLocalPtr ItemList,
17559   Boolean use_new
17560 )
17561 
17562 {
17563   ModifierCombinationPtr m;
17564   QualComboPtr q;
17565   ValNodePtr modifier_indices = NULL;
17566 
17567   if (use_new) {
17568     q = FindBestQualCombo (sep, ItemList);
17569     m = ModifierCombinationFromQualCombo (q);
17570     q = QualComboFree (q);
17571   } else {
17572     m = FindBestCombo (sep, ItemList);
17573   }
17574   if (m != NULL) {
17575     modifier_indices = CopyModifierIndices (m->modifier_indices);
17576   }
17577   FreeModifierCombo (m);
17578   return modifier_indices;
17579 }
17580 
17581 //Not used in Autodef or Cleanup
FindBestModifiers(SeqEntryPtr sep,ModifierItemLocalPtr ItemList)17582 NLM_EXTERN ValNodePtr FindBestModifiers(
17583   SeqEntryPtr sep,
17584   ModifierItemLocalPtr ItemList
17585 )
17586 
17587 {
17588   return FindBestModifiersEx (sep, ItemList, FALSE);
17589 }
17590 
17591 //Not used in Autodef or Cleanup
17592 /* In this test function, we create a list of biosources with various combinations of modifiers,
17593  * and then calculate the best combination to use for the organism description.
17594  */
17595 static CharPtr strings1[] = {"a", "b", "c"};
17596 static CharPtr strings2[] = {"foo", "bar", "baz"};
17597 static CharPtr strings3[] = {"d", "e", "f"};
17598 
SetBiopQual(BioSourcePtr biop,Int4 qual,CharPtr val)17599 static void SetBiopQual (BioSourcePtr biop, Int4 qual, CharPtr val)
17600 {
17601   OrgModPtr mod;
17602   SubSourcePtr ssp;
17603 
17604   if (DefLineModifiers[qual].isOrgMod)
17605   {
17606     mod = OrgModNew ();
17607     mod->subtype = DefLineModifiers[qual].subtype;
17608     mod->subname = StringSave (val);
17609     mod->next = biop->org->orgname->mod;
17610     biop->org->orgname->mod = mod;
17611   } else {
17612     ssp = SubSourceNew ();
17613     ssp->subtype = DefLineModifiers[qual].subtype;
17614     ssp->name = StringSave (val);
17615     ssp->next = biop->subtype;
17616     biop->subtype = ssp;
17617   }
17618 }
17619 
17620 
ClearBiopQuals(BioSourcePtr biop)17621 static void ClearBiopQuals (BioSourcePtr biop)
17622 {
17623   biop->org->orgname->mod = OrgModSetFree (biop->org->orgname->mod);
17624   biop->subtype = SubSourceSetFree (biop->subtype);
17625 }
17626 
17627 
17628 //Not used in Autodef or Cleanup
PrintBiopQuals(BioSourcePtr biop,FILE * fp)17629 static void PrintBiopQuals (BioSourcePtr biop, FILE *fp)
17630 {
17631   OrgModPtr mod;
17632   SubSourcePtr ssp;
17633 
17634   fprintf (fp, "Taxname: %s", biop->org->taxname);
17635   for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
17636     fprintf (fp, "\tOrgMod%d:%s", mod->subtype, mod->subname);
17637   }
17638   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
17639     fprintf (fp, "\tSubSource%d:%s", ssp->subtype, ssp->name);
17640   }
17641   fprintf (fp, "\n");
17642 }
17643 
17644 //Not used in Autodef or Cleanup
PrintModifiers(ValNodePtr modifiers,FILE * fp)17645 static void PrintModifiers (ValNodePtr modifiers, FILE *fp)
17646 {
17647   ValNodePtr vnp;
17648 
17649   if (modifiers == NULL) {
17650     fprintf (fp, "\tNo combo");
17651   }
17652   for (vnp = modifiers; vnp != NULL; vnp = vnp->next) {
17653     fprintf (fp, "\t%s:%d", DefLineModifiers[vnp->data.intvalue].isOrgMod ? "OrgMod" : "SubSource",
17654                             DefLineModifiers[vnp->data.intvalue].subtype);
17655   }
17656   fprintf (fp, "\n");
17657 }
17658 
17659 
17660 //Not used in AUtodef or Cleanup
IsNonTextDeflineQual(Int4 srcqual)17661 static Boolean IsNonTextDeflineQual (Int4 srcqual)
17662 {
17663   if (srcqual == DEFLINE_POS_Transgenic)
17664   {
17665     return TRUE;
17666   }
17667   else
17668   {
17669     return FALSE;
17670   }
17671 }
17672 
17673 //Not used in AUtodef or Cleanup
CreateOneTest(FILE * fp,Int4 i,Int4 j,Int4 k,BioSourcePtr PNTR biops,Int4 num_biops,Boolean vary1,Boolean vary2,Boolean vary3)17674 static void CreateOneTest (FILE *fp, Int4 i, Int4 j, Int4 k, BioSourcePtr PNTR biops, Int4 num_biops, Boolean vary1, Boolean vary2, Boolean vary3)
17675 {
17676   Int4 n;
17677   ValNodePtr uniq_biop_list = NULL;
17678   QualComboPtr q;
17679   ModifierCombinationPtr m;
17680 
17681   for (n = 0; n < num_biops; n++) {
17682     if (i < numDefLineModifiers) {
17683       if (vary1) {
17684         SetBiopQual (biops[n], i, strings1[n]);
17685       } else {
17686         SetBiopQual (biops[n], i, strings1[0]);
17687       }
17688     }
17689     if (j < numDefLineModifiers) {
17690       if (vary2) {
17691         SetBiopQual (biops[n], j, strings2[n]);
17692       } else {
17693         SetBiopQual (biops[n], j, strings2[0]);
17694       }
17695     }
17696     if (k < numDefLineModifiers) {
17697       if (vary3) {
17698         SetBiopQual (biops[n], k, strings3[n]);
17699       } else {
17700         SetBiopQual (biops[n], k, strings3[0]);
17701       }
17702     }
17703     ValNodeAddPointer (&uniq_biop_list, 0, UniqBioSourceNew (biops[n]));
17704   }
17705   q = FindBestQualComboEx (&uniq_biop_list, NULL);
17706   m = ModifierCombinationFromQualCombo (q);
17707   /* print results */
17708   for (n = 0; n < num_biops; n++) {
17709     PrintBiopQuals (biops[n], fp);
17710   }
17711   PrintModifiers (m->modifier_indices, fp);
17712   q = QualComboFree (q);
17713   FreeModifierCombo (m);
17714   uniq_biop_list = UniqBioSourceListFree (uniq_biop_list);
17715 
17716   /* clear quals */
17717   for (n = 0; n < num_biops; n++) {
17718     ClearBiopQuals (biops[n]);
17719   }
17720 
17721 }
17722 
17723 
17724 //Not used in AUtodef or Cleanup
TestFindBestQualCombo(FILE * fp)17725 extern void TestFindBestQualCombo (FILE *fp)
17726 {
17727   BioSourcePtr biops[3];
17728   Int4         num_biops = 3;
17729   Int4         i, j, k;
17730 
17731   for (i = 0; i < num_biops; i++) {
17732     biops[i] = BioSourceNew ();
17733     biops[i]->org = OrgRefNew ();
17734     biops[i]->org->orgname = OrgNameNew ();
17735   }
17736 
17737   /* first try with all organism names the same */
17738   for (i = 0; i < num_biops; i++) {
17739     biops[i]->org->taxname = StringSave ("Homo sapiens");
17740   }
17741 
17742   for (i = 0; i <= numDefLineModifiers; i++) {
17743     if (IsNonTextDeflineQual (i)) {
17744       continue;
17745     }
17746     for (j = i + 1; j <= numDefLineModifiers; j++) {
17747       if (IsNonTextDeflineQual (j)) {
17748         continue;
17749       }
17750       for (k = j + 1; k <= numDefLineModifiers; k++) {
17751         if (IsNonTextDeflineQual (k)) {
17752           continue;
17753         }
17754         if (k != numDefLineModifiers) {
17755           /* try all the same but 1*/
17756           CreateOneTest (fp, i, j, k, biops, num_biops, FALSE, FALSE, TRUE);
17757         }
17758         if (j != numDefLineModifiers || k != numDefLineModifiers) {
17759           /* try 2 different 1 same*/
17760           CreateOneTest (fp, i, j, k, biops, num_biops, FALSE, TRUE, TRUE);
17761         }
17762         if (i != numDefLineModifiers || j != numDefLineModifiers || k != numDefLineModifiers) {
17763           /* try all different */
17764           CreateOneTest (fp, i, j, k, biops, num_biops, TRUE, TRUE, TRUE);
17765         }
17766       }
17767     }
17768   }
17769   for (i = 0; i < num_biops; i++) {
17770     biops[i] = BioSourceFree (biops[i]);
17771   }
17772 
17773 }
17774 
17775 
17776 
17777 /* collection_date has a controlled format.
17778  * It is YYYY or Mmm-YYYY or DD-Mmm-YYYY where Mmm = Jan, Feb, Mar, Apr, May,
17779  *                                                   Jun, Jul, Aug, Sep, Oct,
17780  *                                                   Nov, Dec
17781  * This function will convert other formats  to this format.
17782  * For instance, September 12, 2004 should be converted to 12-Sep-2004
17783  * 12/15/2003 should be converted to 15-Dec-2003.
17784  *
17785  * If the date supplied is ambiguous (01/03/05), can you allow the indexer to choose which field goes in Mmm and which in DD.
17786  */
17787 //Not used in Autodef or Cleanup
ReadNumberFromToken(CharPtr token,Int4 token_len)17788 NLM_EXTERN Int4 ReadNumberFromToken (CharPtr token, Int4 token_len)
17789 {
17790   Int4 val = 0;
17791 
17792   if (token == NULL || !isdigit (*token))
17793   {
17794     return val;
17795   }
17796   while (token_len > 0)
17797   {
17798     val *= 10;
17799     val += *token - '0';
17800     token++;
17801     token_len--;
17802   }
17803 
17804   return val;
17805 }
17806 
17807 //Not used in Autodef or Cleanup
GetYearFromNumber(Int4 year)17808 static Int4 GetYearFromNumber(Int4 year)
17809 {
17810     Nlm_DayTime dt;
17811 
17812   if (year < 1000)
17813   {
17814     GetDayTime (&dt);
17815     if (year + 2000 > dt.tm_year + 1901)
17816     {
17817       year += 1900;
17818     }
17819     else
17820     {
17821       year += 2000;
17822     }
17823   }
17824   return year;
17825 }
17826 
17827 //Not used in Autodef or Cleanup
GetYearFromToken(CharPtr token,Int4 token_len)17828 NLM_EXTERN Int4 GetYearFromToken (CharPtr token, Int4 token_len)
17829 {
17830   Int4        year = 0;
17831 
17832   if (token == NULL || token_len == 0 || token_len > 4 || token_len == 3)
17833   {
17834     return 0;
17835   }
17836 
17837   year = GetYearFromNumber(ReadNumberFromToken (token, token_len));
17838 
17839   return year;
17840 }
17841 
17842 static CharPtr month_abbrevs [12] =
17843 {
17844   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17845   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
17846 };
17847 
17848 //Not used in Autodef or Cleanup
GetMonthAbbrev(Int4 n)17849 NLM_EXTERN CharPtr GetMonthAbbrev (Int4 n)
17850 {
17851   if (n > 0 && n <= 12) {
17852     return month_abbrevs[n - 1];
17853   } else {
17854     return NULL;
17855   }
17856 }
17857 
17858 
17859 static Int4 days_in_month [12] =
17860 {
17861   31, 29, 31, 30, 31, 30,
17862   31, 31, 30, 31, 30, 31
17863 };
17864 
17865 //Not used in Autodef or Cleanup
GetDaysInMonth(Int4 n)17866 NLM_EXTERN Int4 GetDaysInMonth (Int4 n)
17867 {
17868   if (n > 0 && n <= 12) {
17869     return days_in_month[n - 1];
17870   } else {
17871     return 0;
17872   }
17873 }
17874 
17875 //Not used in Autodef or Cleanup
GetMonthNumFromAbbrev(CharPtr month_abbrev)17876 NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev)
17877 {
17878   Int4 i;
17879 
17880   for (i = 0; i < 12; i++) {
17881     if (StringICmp (month_abbrev, month_abbrevs[i]) == 0) {
17882       return i;
17883     }
17884   }
17885   return -1;
17886 }
17887 
17888 //Not used in Autodef or Cleanup
GetDaysInMonthByName(CharPtr month)17889 static Int4 GetDaysInMonthByName (CharPtr month)
17890 {
17891   Int4 month_num;
17892 
17893   for (month_num = 0; month_num < 12; month_num++)
17894   {
17895     if (StringCmp (month, month_abbrevs [month_num]) == 0)
17896     {
17897       return days_in_month [month_num];
17898     }
17899   }
17900   return 0;
17901 }
17902 
17903 //Not used in Autodef or Cleanup
GetMonthFromToken(CharPtr token,Int4 token_len)17904 NLM_EXTERN CharPtr GetMonthFromToken (CharPtr token, Int4 token_len)
17905 {
17906   Int4    month_num;
17907 
17908   if (token == NULL || token_len == 0)
17909   {
17910     return NULL;
17911   }
17912 
17913   if (isdigit (*token))
17914   {
17915     if (token_len > 2)
17916     {
17917       return NULL;
17918     }
17919     else
17920     {
17921       month_num = ReadNumberFromToken (token, token_len);
17922       if (month_num == 0 || month_num > 12)
17923       {
17924         return NULL;
17925       }
17926       else
17927       {
17928         return month_abbrevs [month_num - 1];
17929       }
17930     }
17931   }
17932   else
17933   {
17934     for (month_num = 0; month_num < 12; month_num++)
17935     {
17936       if (StringNICmp (token, month_abbrevs[month_num], 3) == 0)
17937       {
17938         return month_abbrevs[month_num];
17939       }
17940     }
17941     return NULL;
17942   }
17943 }
17944 
17945 //Not used in Autodef or Cleanup
17946 static Boolean
ChooseDayAndYear(Int4 num_1,Int4 num_1_len,Int4 num_2,Int4 num_2_len,CharPtr month,Boolean year_first,Int4Ptr day,Int4Ptr year)17947 ChooseDayAndYear
17948 (Int4    num_1,
17949  Int4    num_1_len,
17950  Int4    num_2,
17951  Int4    num_2_len,
17952  CharPtr month,
17953  Boolean year_first,
17954  Int4Ptr day,
17955  Int4Ptr year)
17956 {
17957   if (day == NULL || year == NULL)
17958   {
17959     return FALSE;
17960   }
17961 
17962   if (num_1 == 0 && num_2 == 0)
17963   {
17964     return FALSE;
17965   }
17966   else if (num_1 == 0 && num_1_len == 2)
17967   {
17968     *year = 2000;
17969     *day = num_2;
17970   }
17971   else if (num_2 == 0 && num_2_len == 2)
17972   {
17973     *year = 2000;
17974     *day = num_1;
17975   }
17976   else if (num_1 == 0 || num_2 == 0)
17977   {
17978     return FALSE;
17979   }
17980   else if (num_1 > GetDaysInMonthByName (month) && (num_2_len == 2 || num_2_len == 4))
17981   {
17982     if (num_2 > GetDaysInMonthByName (month))
17983     {
17984       return FALSE;
17985     }
17986     *year = num_1;
17987     *day = num_2;
17988   }
17989   else if (num_2 > GetDaysInMonthByName (month) && (num_1_len == 2 || num_1_len == 4))
17990   {
17991     *year = num_2;
17992     *day = num_1;
17993   }
17994   else if (year_first && (num_1_len == 2 || num_1_len == 4))
17995   {
17996     *year = num_1;
17997     *day = num_2;
17998   }
17999   else if (num_2_len == 2 || num_2_len == 4)
18000   {
18001     *year = num_2;
18002     *day = num_1;
18003   }
18004   else
18005   {
18006     return FALSE;
18007   }
18008 
18009   return TRUE;
18010 }
18011 
18012 //Not used in Autodef or Cleanup
18013 static Boolean
ChooseMonthAndYear(Int4 num_1,Int4 num_2,Boolean month_first,CharPtr PNTR month,Int4Ptr year,BoolPtr month_ambiguous)18014 ChooseMonthAndYear
18015 (Int4    num_1,
18016  Int4    num_2,
18017  Boolean month_first,
18018  CharPtr PNTR month,
18019  Int4Ptr year,
18020  BoolPtr month_ambiguous)
18021 {
18022   if (year == NULL || month == NULL
18023       || (num_1 == 0 && num_2 == 0)
18024       || (num_1 > 12 && num_2 > 12)
18025       || (num_1 == 0 && num_2 > 12)
18026       || (num_2 == 0 && num_1 > 12))
18027   {
18028     return FALSE;
18029   }
18030 
18031   if (num_1 == 0)
18032   {
18033     *year = 2000;
18034     *month = month_abbrevs[num_2 - 1];
18035   }
18036   else if (num_2 == 0)
18037   {
18038     *year = 2000;
18039     *month = month_abbrevs[num_1 - 1];
18040   }
18041   else if (num_1 > 12)
18042   {
18043     *year = GetYearFromNumber(num_1);
18044     *month = month_abbrevs [num_2 - 1];
18045   }
18046   else if (num_2 > 12)
18047   {
18048     *year = GetYearFromNumber(num_2);
18049     *month = month_abbrevs [num_1 - 1];
18050   }
18051   else if (month_first)
18052   {
18053     if (month_ambiguous != NULL)
18054     {
18055       *month_ambiguous = TRUE;
18056     }
18057     *year = GetYearFromNumber(num_2);
18058     *month = month_abbrevs [num_1 - 1];
18059   }
18060   else
18061   {
18062     if (month_ambiguous != NULL)
18063     {
18064       *month_ambiguous = TRUE;
18065     }
18066     *year = GetYearFromNumber(num_1);
18067     *month = month_abbrevs [num_2 - 1];
18068   }
18069   return TRUE;
18070 }
18071 
18072 
18073 //Not used in Autodef or Cleanup
ChooseMonthAndDay(Int4 num_1,Int4 num_2,Boolean month_first,CharPtr PNTR month,Int4Ptr day,BoolPtr month_ambiguous)18074 static Boolean ChooseMonthAndDay
18075 (Int4    num_1,
18076  Int4    num_2,
18077  Boolean month_first,
18078  CharPtr PNTR month,
18079  Int4Ptr day,
18080  BoolPtr month_ambiguous)
18081 {
18082   if (day == NULL || month == NULL || num_1 == 0 || num_2 == 0
18083       || (num_1 > 12 && num_2 > 12))
18084   {
18085     return FALSE;
18086   }
18087 
18088   if (num_1 > 12)
18089   {
18090     *day = num_1;
18091     *month = month_abbrevs [num_2 - 1];
18092   }
18093   else if (num_2 > 12)
18094   {
18095     *day = num_2;
18096     *month = month_abbrevs [num_1 - 1];
18097   }
18098   else if (month_first)
18099   {
18100     if (month_ambiguous != NULL)
18101     {
18102       *month_ambiguous = TRUE;
18103     }
18104     *day = num_2;
18105     *month = month_abbrevs [num_1 - 1];
18106   }
18107   else
18108   {
18109     if (month_ambiguous != NULL)
18110     {
18111       *month_ambiguous = TRUE;
18112     }
18113     *day = num_1;
18114     *month = month_abbrevs [num_2 - 1];
18115   }
18116   return TRUE;
18117 }
18118 
18119 //Not used in Cleanup or Autodef
ReformatDateStringEx(CharPtr orig_date,Boolean month_first,BoolPtr month_ambiguous)18120 NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, BoolPtr month_ambiguous)
18121 {
18122   CharPtr reformatted_date = NULL, cp;
18123   Int4    year = 0, day = 0;
18124   CharPtr month = NULL;
18125   CharPtr token_list[3];
18126   Int4    token_lens[3];
18127   CharPtr numbers = "0123456789";
18128   CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
18129   Int4    num_tokens = 0;
18130   Int4    token_len;
18131   Int4    month_token = -1;
18132   Boolean is_num;
18133   Int4    num_1, num_2, num_3;
18134 
18135   if (StringHasNoText (orig_date))
18136   {
18137     return NULL;
18138   }
18139   if (month_ambiguous != NULL) {
18140     *month_ambiguous = FALSE;
18141   }
18142 
18143   /* divide our original date into tokens */
18144   /* skip over any leading spaces */
18145   cp = orig_date;
18146   while (*cp != 0 && num_tokens < 3)
18147   {
18148     is_num = FALSE;
18149     token_len = StringSpn (cp, numbers);
18150     if (token_len == 0)
18151     {
18152       token_len = StringSpn (cp, letters);
18153     }
18154     else
18155     {
18156       is_num = TRUE;
18157     }
18158     if (token_len == 0)
18159     {
18160       cp++;
18161     }
18162     else
18163     {
18164       if (!is_num)
18165       {
18166         if (month_token == -1)
18167         {
18168           month_token = num_tokens;
18169         }
18170         else
18171         {
18172           /* already found a month string */
18173           return NULL;
18174         }
18175       }
18176       token_list [num_tokens] = cp;
18177       token_lens [num_tokens] = token_len;
18178       num_tokens ++;
18179       cp += token_len;
18180     }
18181   }
18182 
18183   if (num_tokens == 0 || *cp != 0)
18184   {
18185     return NULL;
18186   }
18187 
18188   if (num_tokens == 1)
18189   {
18190     if (month_token == 0)
18191     {
18192       return NULL;
18193     }
18194     year = GetYearFromToken (token_list [0], token_lens [0]);
18195   }
18196   else if (num_tokens == 2)
18197   {
18198     if (month_token == 0)
18199     {
18200       month = GetMonthFromToken (token_list [0], token_lens [0]);
18201       year = GetYearFromToken (token_list [1], token_lens [1]);
18202     }
18203     else if (month_token == 1)
18204     {
18205       month = GetMonthFromToken (token_list [1], token_lens [1]);
18206       year = GetYearFromToken (token_list [0], token_lens [0]);
18207     }
18208     else
18209     {
18210       num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
18211       num_2 = ReadNumberFromToken (token_list [1], token_lens [1]);
18212       if (! ChooseMonthAndYear (num_1, num_2, month_first, &month, &year, month_ambiguous))
18213       {
18214         return NULL;
18215       }
18216     }
18217   }
18218   else if (num_tokens == 3)
18219   {
18220     if (month_token == 0)
18221     {
18222       month = GetMonthFromToken (token_list [0], token_lens [0]);
18223       num_1 = ReadNumberFromToken (token_list [1], token_lens [1]);
18224       num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
18225       if (!ChooseDayAndYear (num_1, token_lens[1], num_2, token_lens[2], month, FALSE, &day, &year))
18226       {
18227         return NULL;
18228       }
18229     }
18230     else if (month_token == 1)
18231     {
18232       month = GetMonthFromToken (token_list [1], token_lens [1]);
18233       num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
18234       num_2 = ReadNumberFromToken (token_list [2], token_lens [2]);
18235       if (!ChooseDayAndYear (num_1, token_lens[0], num_2, token_lens[2], month, FALSE, &day, &year))
18236       {
18237         return NULL;
18238       }
18239     }
18240     else if (month_token == 2)
18241     {
18242       month = GetMonthFromToken (token_list [2], token_lens [2]);
18243       num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
18244       num_2 = ReadNumberFromToken (token_list [1], token_lens [1]);
18245       if (!ChooseDayAndYear (num_1, token_lens[0], num_2, token_lens[1], month, FALSE, &day, &year))
18246       {
18247         return NULL;
18248       }
18249     }
18250     else
18251     {
18252       num_1 = ReadNumberFromToken (token_list [0], token_lens [0]);
18253       num_2 = ReadNumberFromToken (token_list [1], token_lens [1]);
18254       num_3 = ReadNumberFromToken (token_list [2], token_lens [2]);
18255 
18256       if (num_1 > 31 || num_1 == 0)
18257       {
18258         year = num_1;
18259         if (! ChooseMonthAndDay (num_2, num_3, month_first, &month, &day, month_ambiguous))
18260         {
18261           return NULL;
18262         }
18263       }
18264       else if (num_2 > 31 || num_2 == 0)
18265       {
18266         year = num_2;
18267         if (! ChooseMonthAndDay (num_1, num_3, month_first, &month, &day, month_ambiguous))
18268         {
18269           return NULL;
18270         }
18271       }
18272       else if (num_3 > 31 || num_3 == 0)
18273       {
18274         year = num_3;
18275         if (! ChooseMonthAndDay (num_1, num_2, month_first, &month, &day, month_ambiguous))
18276         {
18277           return NULL;
18278         }
18279       }
18280       else if (num_1 > 0 && num_1 < 13 && num_2 > days_in_month [num_1] && num_3 <= days_in_month [num_1])
18281       {
18282         month = month_abbrevs [num_1 - 1];
18283         year = num_2;
18284         day = num_3;
18285       }
18286       else if (num_1 > 0 && num_1 < 13 && num_3 > days_in_month [num_1] && num_2 <= days_in_month [num_1])
18287       {
18288         month = month_abbrevs [num_1 - 1];
18289         year = num_3;
18290         day = num_2;
18291       }
18292       else if (num_2 > 0 && num_2 < 13 && num_1 > days_in_month [num_2] && num_3 <= days_in_month [num_1])
18293       {
18294         month = month_abbrevs [num_2 - 1];
18295         year = num_1;
18296         day = num_3;
18297       }
18298       else if (num_2 > 0 && num_2 < 13 && num_3 > days_in_month [num_2] && num_1 <= days_in_month [num_1])
18299       {
18300         month = month_abbrevs [num_2 - 1];
18301         year = num_3;
18302         day = num_1;
18303       }
18304       else if (num_3 > 0 && num_3 < 13 && num_1 > days_in_month [num_3] && num_2 <= days_in_month [num_1])
18305       {
18306         month = month_abbrevs [num_3 - 1];
18307         year = num_1;
18308         day = num_2;
18309       }
18310       else if (num_3 > 0 && num_3 < 13 && num_2 > days_in_month [num_3] && num_1 <= days_in_month [num_1])
18311       {
18312         month = month_abbrevs [num_3 - 1];
18313         year = num_2;
18314         day = num_1;
18315       }
18316       else
18317       {
18318         year = num_3;
18319         if (! ChooseMonthAndDay (num_1, num_2, month_first, &month, &day, month_ambiguous))
18320         {
18321           year = num_1;
18322           if (!ChooseMonthAndDay (num_2, num_3, month_first, &month, &day, month_ambiguous))
18323           {
18324             return NULL;
18325           }
18326         }
18327       }
18328 
18329     }
18330     year = GetYearFromNumber(year);
18331   }
18332 
18333   if (month == NULL && (day > 0 || num_tokens > 1))
18334   {
18335     return NULL;
18336   }
18337   if (day < 1 && num_tokens > 2)
18338   {
18339     return NULL;
18340   }
18341 
18342   reformatted_date = (CharPtr) MemNew (sizeof (Char) * 12);
18343   if (reformatted_date == NULL)
18344   {
18345     return NULL;
18346   }
18347 
18348   if (month == NULL)
18349   {
18350     sprintf (reformatted_date, "%d", year);
18351   }
18352   else if (day == 0)
18353   {
18354     sprintf (reformatted_date, "%s-%d", month, year);
18355   }
18356   else
18357   {
18358     sprintf (reformatted_date, "%02d-%s-%d", day, month, year);
18359   }
18360   return reformatted_date;
18361 }
18362 
18363 
18364 //Not used in Autodef or Cleanup
ReformatAssemblyDate(CharPtr PNTR orig_date)18365 NLM_EXTERN Boolean ReformatAssemblyDate (CharPtr PNTR orig_date)
18366 {
18367   CharPtr collection_date;
18368   CharPtr assembly_date;
18369   Boolean ambiguous = FALSE;
18370 
18371   if (orig_date == NULL || StringHasNoText (*orig_date)) {
18372     return FALSE;
18373   }
18374 
18375   collection_date = ReformatDateStringEx(*orig_date, TRUE, &ambiguous);
18376   if (StringHasNoText(collection_date)) {
18377     collection_date = MemFree (collection_date);
18378     return FALSE;
18379   }
18380 
18381 
18382   assembly_date = AssemblyDateFromCollectionDate (collection_date, ambiguous);
18383   collection_date = MemFree (collection_date);
18384   if (!StringHasNoText (assembly_date)) {
18385     *orig_date = MemFree (*orig_date);
18386     *orig_date = assembly_date;
18387     return TRUE;
18388   }
18389 
18390   return FALSE;
18391 }
18392 
18393 
18394 //Not used in Autodef or Cleanup
ReformatDateWithMonthNames(CharPtr orig_date)18395 NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date)
18396 {
18397   CharPtr reformatted_date = NULL, cp;
18398   Int4    year = 0, day = 0;
18399   CharPtr month = NULL;
18400   CharPtr token_list[3];
18401   Int4    token_lens[3];
18402   CharPtr numbers = "0123456789";
18403   CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
18404   Int4    num_tokens = 0;
18405   Int4    token_len;
18406   Int4    month_token = -1;
18407   Boolean is_num;
18408   Int4    nums[2];
18409   Int4    num_lens[2];
18410   Int4    i, nums_pos;
18411 
18412   if (StringHasNoText (orig_date))
18413   {
18414     return NULL;
18415   }
18416 
18417   /* divide our original date into tokens */
18418   /* skip over any leading spaces */
18419   cp = orig_date;
18420   while (*cp != 0 && num_tokens < 3)
18421   {
18422     is_num = FALSE;
18423     token_len = StringSpn (cp, numbers);
18424     if (token_len == 0)
18425     {
18426       token_len = StringSpn (cp, letters);
18427     }
18428     else
18429     {
18430       is_num = TRUE;
18431     }
18432     if (token_len == 0)
18433     {
18434       cp++;
18435     }
18436     else
18437     {
18438       if (!is_num)
18439       {
18440         if (month_token == -1)
18441         {
18442           month_token = num_tokens;
18443         }
18444         else
18445         {
18446           /* already found a month string */
18447           return NULL;
18448         }
18449       }
18450       token_list [num_tokens] = cp;
18451       token_lens [num_tokens] = token_len;
18452       num_tokens ++;
18453       cp += token_len;
18454     }
18455   }
18456 
18457   if (num_tokens == 0 || *cp != 0 || num_tokens < 2)
18458   {
18459     return NULL;
18460   }
18461   if (month_token == -1) {
18462     /* tokens are all numbers */
18463     /* if only one number can be mapped to a month, that is the month token */
18464     for (i = 0; i < num_tokens; i++) {
18465       if (GetMonthFromToken (token_list [i], token_lens [i]) != NULL) {
18466         if (month_token == -1) {
18467           month_token = i;
18468         } else {
18469           return NULL;
18470         }
18471       }
18472     }
18473   }
18474   if (month_token == -1) {
18475     /* ambiguous */
18476     return NULL;
18477   }
18478 
18479   if (num_tokens == 2)
18480   {
18481     if (month_token == 0)
18482     {
18483       month = GetMonthFromToken (token_list [0], token_lens [0]);
18484       year = GetYearFromToken (token_list [1], token_lens [1]);
18485     }
18486     else if (month_token == 1)
18487     {
18488       month = GetMonthFromToken (token_list [1], token_lens [1]);
18489       year = GetYearFromToken (token_list [0], token_lens [0]);
18490     }
18491     else
18492     {
18493       return NULL;
18494     }
18495   }
18496   else if (num_tokens == 3)
18497   {
18498     if (month_token < 0 || month_token > 2)
18499     {
18500       return NULL;
18501     }
18502     nums_pos = 0;
18503     for (i = 0; i < 3; i++) {
18504       if (i == month_token) {
18505         month = GetMonthFromToken (token_list[i], token_lens[i]);
18506       } else if (token_lens[i] == 3) {
18507         return NULL;
18508       } else {
18509         nums[nums_pos] = ReadNumberFromToken(token_list[i], token_lens[i]);
18510         num_lens[nums_pos] = token_lens[i];
18511         nums_pos++;
18512       }
18513     }
18514 
18515     if (!ChooseDayAndYear (nums[0], num_lens[0], nums[1], num_lens[1], month, FALSE, &day, &year))
18516     {
18517       return NULL;
18518     }
18519 
18520     year = GetYearFromNumber(year);
18521   }
18522 
18523   if (month == NULL && (day > 0 || num_tokens > 1))
18524   {
18525     return NULL;
18526   }
18527   if (day < 1 && num_tokens > 2)
18528   {
18529     return NULL;
18530   }
18531 
18532   reformatted_date = (CharPtr) MemNew (sizeof (Char) * 12);
18533   if (reformatted_date == NULL)
18534   {
18535     return NULL;
18536   }
18537 
18538   if (month == NULL)
18539   {
18540     sprintf (reformatted_date, "%d", year);
18541   }
18542   else if (day == 0)
18543   {
18544     sprintf (reformatted_date, "%s-%d", month, year);
18545   }
18546   else
18547   {
18548     sprintf (reformatted_date, "%02d-%s-%d", day, month, year);
18549   }
18550   return reformatted_date;
18551 }
18552 
18553 //Not used in Autodef or Cleanup
CreateMatPeptideFromCDS(SeqFeatPtr sfp)18554 NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp)
18555 {
18556   SeqFeatPtr        orig_prot, new_prot;
18557   SeqMgrFeatContext fcontext;
18558   SeqLocPtr         prot_loc;
18559   ProtRefPtr        prp;
18560   BioseqPtr         prot_bsp;
18561 
18562   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) {
18563     return FALSE;
18564   }
18565 
18566   prot_bsp = BioseqFindFromSeqLoc (sfp->product);
18567   if (prot_bsp != NULL) {
18568     prot_loc = SeqLocIntNew (0, prot_bsp->length - 1, Seq_strand_plus, SeqLocId (sfp->product));
18569     new_prot = CreateNewFeatureOnBioseq (prot_bsp, SEQFEAT_PROT, prot_loc);
18570     orig_prot = SeqMgrGetNextFeature (prot_bsp, NULL, 0, FEATDEF_PROT, &fcontext);
18571     if (orig_prot != NULL) {
18572       prp = AsnIoMemCopy (orig_prot->data.value.ptrvalue, (AsnReadFunc) ProtRefAsnRead, (AsnWriteFunc) ProtRefAsnWrite);
18573     } else {
18574       prp = ProtRefNew ();
18575     }
18576     prp->processed = 2;
18577     new_prot->data.value.ptrvalue = prp;
18578     return TRUE;
18579   } else {
18580     return FALSE;
18581   }
18582 }
18583 
18584 
18585 //Not used in Autodef or Cleanup
ConvertCDSToMatPeptideForOverlappingCDS(SeqFeatPtr sfp,SeqFeatPtr top_cds,Boolean remove_original)18586 NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqFeatPtr top_cds, Boolean remove_original)
18587 {
18588   BioseqPtr prot_bsp;
18589   CdRegionPtr crp;
18590   SeqLocPtr   prot_loc;
18591   SeqFeatPtr  new_sfp, orig_prot;
18592   SeqMgrFeatContext prot_context;
18593   ProtRefPtr prp;
18594   Int4 frame;
18595   Boolean rval = FALSE;
18596   Boolean partial5, partial3;
18597 
18598   if (sfp == NULL || top_cds == NULL || sfp->data.choice != SEQFEAT_CDREGION || top_cds->data.choice != SEQFEAT_CDREGION) {
18599     return FALSE;
18600   }
18601 
18602   prot_bsp = BioseqFindFromSeqLoc (top_cds->product);
18603   if (prot_bsp != NULL)
18604   {
18605     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
18606 
18607     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
18608     prot_loc = dnaLoc_to_aaLoc(top_cds, sfp->location, TRUE, &frame, !partial3);
18609     if (prot_loc != NULL)
18610     {
18611       /* Create new feature on prot_bsp */
18612       new_sfp = CreateNewFeatureOnBioseq (prot_bsp, SEQFEAT_PROT, prot_loc);
18613       if (new_sfp != NULL)
18614       {
18615         prot_bsp = BioseqFindFromSeqLoc (sfp->product);
18616         orig_prot = SeqMgrGetNextFeature (prot_bsp, NULL, 0, FEATDEF_PROT, &prot_context);
18617         if (orig_prot == NULL) {
18618           prp = ProtRefNew ();
18619         } else {
18620           prp = AsnIoMemCopy (orig_prot->data.value.ptrvalue, (AsnReadFunc) ProtRefAsnRead, (AsnWriteFunc) ProtRefAsnWrite);
18621         }
18622         prp->processed = 2;
18623         new_sfp->data.value.ptrvalue = prp;
18624         prot_bsp->idx.deleteme = TRUE;
18625 
18626         rval = TRUE;
18627       }
18628       /* mark old feature for deletion */
18629       sfp->idx.deleteme = TRUE;
18630     }
18631   }
18632   return rval;
18633 }
18634 
18635 
18636 //Not used in Autodef or Cleanup
AutoConvertCDSToMiscFeat(SeqFeatPtr cds,Boolean remove_original)18637 NLM_EXTERN Boolean AutoConvertCDSToMiscFeat (SeqFeatPtr cds, Boolean remove_original)
18638 {
18639   BioseqPtr bsp;
18640   SeqFeatPtr top_cds = NULL;
18641   SeqMgrFeatContext fcontext;
18642   Boolean rval = FALSE;
18643 
18644   if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) {
18645     return FALSE;
18646   }
18647 
18648   bsp = BioseqFindFromSeqLoc (cds->location);
18649 
18650   /* find overlapping coding region */
18651   for (top_cds = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
18652        top_cds != NULL && (top_cds == cds || SeqLocCompare (top_cds->location, cds->location) != SLC_B_IN_A);
18653        top_cds = SeqMgrGetNextFeature (bsp, top_cds, SEQFEAT_CDREGION, 0, &fcontext)) {
18654   }
18655 
18656   if (top_cds == NULL) {
18657     if (remove_original) {
18658       rval = FALSE;
18659     } else {
18660       rval = CreateMatPeptideFromCDS (cds);
18661     }
18662   } else {
18663     rval = ConvertCDSToMatPeptideForOverlappingCDS (cds, top_cds, remove_original);
18664   }
18665   if (rval) {
18666     /* have to remove CDS because ConvertFeature has already created a duplicate if the feature is going to be kept */
18667     cds->idx.deleteme = TRUE;
18668   }
18669   return rval;
18670 }
18671 
18672 //Not part of Autodef or Cleanup
GetBestSeqEntryForItem(ValNodePtr vnp)18673 NLM_EXTERN SeqEntryPtr GetBestSeqEntryForItem (ValNodePtr vnp)
18674 {
18675   SeqFeatPtr       sfp;
18676   SeqDescrPtr      sdp;
18677   ObjValNodePtr    ovp;
18678   SeqEntryPtr      sep = NULL;
18679   BioseqPtr        bsp;
18680 
18681   if (vnp == NULL || vnp->data.ptrvalue == NULL) return NULL;
18682 
18683   if (vnp->choice == OBJ_SEQFEAT) {
18684     sfp = vnp->data.ptrvalue;
18685     sep = GetBestTopParentForData (sfp->idx.entityID, BioseqFindFromSeqLoc (sfp->location));
18686   } else if (vnp->choice == OBJ_SEQDESC) {
18687     sdp = vnp->data.ptrvalue;
18688     if (sdp->extended != 0) {
18689       ovp = (ObjValNodePtr) sdp;
18690       if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
18691         sep = SeqMgrGetSeqEntryForData (ovp->idx.parentptr);
18692       } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
18693         sep = GetBestTopParentForData (ovp->idx.entityID, ovp->idx.parentptr);
18694       }
18695     }
18696   } else if (vnp->choice == OBJ_BIOSEQ) {
18697     bsp = (BioseqPtr) vnp->data.ptrvalue;
18698     sep = GetBestTopParentForData (bsp->idx.entityID, bsp);
18699   } else if (vnp->choice == OBJ_SEQENTRY) {
18700     sep = vnp->data.ptrvalue;
18701   }
18702   return sep;
18703 }
18704 
18705 
18706 //Not part of Autodef or Cleanup
IsDescriptorInList(SeqDescPtr sdp,SeqDescPtr list)18707 static Boolean IsDescriptorInList(SeqDescPtr sdp, SeqDescPtr list)
18708 {
18709   Boolean found_match = FALSE;
18710   SeqDescPtr sdp_tmp, sdp_tmp_next;
18711   for (sdp_tmp = list, found_match = FALSE;
18712        sdp_tmp != NULL && !found_match;
18713        sdp_tmp = sdp_tmp->next) {
18714     sdp_tmp_next = sdp_tmp->next;
18715     sdp_tmp->next = NULL;
18716     if (AsnIoMemComp (sdp, sdp_tmp, (AsnWriteFunc) SeqDescrAsnWrite)) {
18717       found_match = TRUE;
18718     }
18719     sdp_tmp->next = sdp_tmp_next;
18720   }
18721   return found_match;
18722 }
18723 
18724 
18725 //Not part of Autodef or Cleanup
AddNewUniqueDescriptors(SeqDescrPtr PNTR new_set,SeqDescrPtr parent_set)18726 NLM_EXTERN void AddNewUniqueDescriptors (SeqDescrPtr PNTR new_set, SeqDescrPtr parent_set)
18727 {
18728   SeqDescrPtr sdp, sdp_next;
18729   Boolean     found_match;
18730 
18731   if (new_set == NULL || parent_set == NULL) return;
18732 
18733   if (*new_set == NULL) {
18734     ValNodeLink (new_set,
18735                  AsnIoMemCopy ((Pointer) parent_set,
18736                                (AsnReadFunc) SeqDescrAsnRead,
18737                                (AsnWriteFunc) SeqDescrAsnWrite));
18738   } else {
18739     sdp = parent_set;
18740     while (sdp != NULL) {
18741       sdp_next = sdp->next;
18742       sdp->next = NULL;
18743       found_match = IsDescriptorInList(sdp, *new_set);
18744       if (!found_match) {
18745         ValNodeLink (new_set,
18746                      AsnIoMemCopy ((Pointer) sdp,
18747                                    (AsnReadFunc) SeqDescrAsnRead,
18748                                    (AsnWriteFunc) SeqDescrAsnWrite));
18749       }
18750       sdp->next = sdp_next;
18751       sdp = sdp->next;
18752     }
18753   }
18754 }
18755 
18756 
18757 //Not part of Autodef or Cleanup
AddNewUniqueDescriptorsToSeqEntry(SeqEntryPtr sep,SeqDescrPtr parent_set)18758 static void AddNewUniqueDescriptorsToSeqEntry (SeqEntryPtr sep, SeqDescrPtr parent_set)
18759 {
18760   BioseqPtr    bsp;
18761   BioseqSetPtr bssp;
18762 
18763   if (IS_Bioseq(sep)) {
18764     bsp = (BioseqPtr) sep->data.ptrvalue;
18765     if (bsp != NULL) {
18766       AddNewUniqueDescriptors (&(bsp->descr), parent_set);
18767     }
18768   } else if (IS_Bioseq_set (sep)) {
18769     bssp = (BioseqSetPtr) sep->data.ptrvalue;
18770     if (bssp != NULL) {
18771       AddNewUniqueDescriptors (&(bssp->descr), parent_set);
18772     }
18773   }
18774 }
18775 
18776 
18777 //Not part of Autodef or Cleanup
AddNewUniqueAnnotations(SeqAnnotPtr PNTR new_set,SeqAnnotPtr parent_set)18778 NLM_EXTERN void AddNewUniqueAnnotations (SeqAnnotPtr PNTR new_set, SeqAnnotPtr parent_set)
18779 {
18780   SeqAnnotPtr sap, sap_next, sap_tmp, sap_tmp_next, sap_copy, last_sap;
18781   Boolean     found_match;
18782 
18783   if (new_set == NULL || parent_set == NULL) return;
18784 
18785   sap = parent_set;
18786   while (sap != NULL) {
18787     sap_next = sap->next;
18788     sap->next = NULL;
18789     last_sap = NULL;
18790     for (sap_tmp = *new_set, found_match = FALSE;
18791          sap_tmp != NULL && !found_match;
18792          sap_tmp = sap_tmp->next) {
18793       sap_tmp_next = sap_tmp->next;
18794       sap_tmp->next = NULL;
18795       if (AsnIoMemComp (sap, sap_tmp, (AsnWriteFunc) SeqAnnotAsnWrite)) {
18796         found_match = TRUE;
18797       }
18798       sap_tmp->next = sap_tmp->next;
18799       last_sap = sap_tmp;
18800     }
18801     if (!found_match) {
18802       sap_copy = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
18803       if (last_sap == NULL) {
18804         *new_set = sap_copy;
18805       } else {
18806         last_sap->next = sap_copy;
18807       }
18808     }
18809     sap->next = sap_next;
18810     sap = sap->next;
18811   }
18812 }
18813 
18814 
18815 //Not part of Autodef or Cleanup
AddItemListToSet(ValNodePtr item_list,BioseqSetPtr newset,Boolean for_segregate)18816 static void AddItemListToSet (ValNodePtr item_list, BioseqSetPtr newset, Boolean for_segregate)
18817 {
18818   ValNodePtr vnp_item;
18819   SeqEntryPtr sep, last_sep, prev_sep, remove_sep;
18820   BioseqSetPtr bssp, orig_parent;
18821   BioseqPtr bsp;
18822 
18823   if (newset == NULL || item_list == NULL) return;
18824 
18825   last_sep = newset->seq_set;
18826   while (last_sep != NULL && last_sep->next != NULL) {
18827     last_sep = last_sep->next;
18828   }
18829 
18830   for (vnp_item = item_list; vnp_item != NULL; vnp_item = vnp_item->next) {
18831     sep = GetBestSeqEntryForItem (vnp_item);
18832     if (sep == NULL || sep->data.ptrvalue == NULL) continue;
18833     orig_parent = NULL;
18834     bsp = NULL;
18835     bssp = NULL;
18836     if (IS_Bioseq (sep)) {
18837       bsp = sep->data.ptrvalue;
18838       if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
18839         orig_parent = bsp->idx.parentptr;
18840         bsp->idx.parentptr = NULL;
18841       }
18842     } else if (IS_Bioseq_set (sep)) {
18843       bssp = sep->data.ptrvalue;
18844       if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
18845         orig_parent = bssp->idx.parentptr;
18846         bssp->idx.parentptr = NULL;
18847       }
18848     } else {
18849       continue;
18850     }
18851 
18852     if (orig_parent != NULL) {
18853       /* remove this seq-entry from the original parent */
18854       prev_sep = NULL;
18855       for (remove_sep = orig_parent->seq_set;
18856            remove_sep != NULL && remove_sep != sep;
18857            remove_sep = remove_sep->next) {
18858         prev_sep = remove_sep;
18859       }
18860       if (remove_sep == sep) {
18861         if (prev_sep == NULL) {
18862           orig_parent->seq_set = orig_parent->seq_set->next;
18863           if (orig_parent->seq_set == NULL) {
18864             orig_parent->idx.deleteme = TRUE;
18865           }
18866         } else {
18867           prev_sep->next = sep->next;
18868         }
18869       }
18870       /* set class type if not already set */
18871       if (newset->_class == BioseqseqSet_class_genbank && for_segregate) {
18872         newset->_class = orig_parent->_class;
18873       }
18874     }
18875     if (orig_parent != NULL) {
18876       if (for_segregate) {
18877         /* add descriptors from the orig_parent to the new parent */
18878         AddNewUniqueDescriptors (&(newset->descr), orig_parent->descr);
18879 
18880         /* add annotations from the orig_parent to the new parent */
18881         AddNewUniqueAnnotations (&(newset->annot), orig_parent->annot);
18882       } else {
18883         /* add descriptors from the orig_parent to the bioseq itself (or nuc-prot-set if that's what moved) */
18884         if (bsp != NULL) {
18885           AddNewUniqueDescriptors (&(bsp->descr), orig_parent->descr);
18886         } else if (bssp != NULL) {
18887           AddNewUniqueDescriptors (&(bssp->descr), orig_parent->descr);
18888         }
18889       }
18890     }
18891 
18892     /* add to new parent */
18893     sep->next = NULL;
18894     if (last_sep == NULL) {
18895       newset->seq_set = sep;
18896     } else {
18897       last_sep->next = sep;
18898     }
18899     last_sep = sep;
18900     SeqMgrLinkSeqEntry (sep, OBJ_BIOSEQSET, newset);
18901   }
18902 
18903 }
18904 
18905 
18906 //Not part of AutoDef or Cleanup
AddCategorySeqEntriesToSet(BioseqSetPtr newset,ClickableItemPtr category)18907 static void AddCategorySeqEntriesToSet (BioseqSetPtr newset, ClickableItemPtr category)
18908 {
18909   ValNodePtr vnp_item;
18910 
18911   if (newset == NULL || category == NULL || category->item_list == NULL) return;
18912 
18913   if (category->chosen) {
18914     AddItemListToSet (category->item_list, newset, TRUE);
18915   } else {
18916     for (vnp_item = category->subcategories; vnp_item != NULL; vnp_item = vnp_item->next) {
18917       AddCategorySeqEntriesToSet (newset, vnp_item->data.ptrvalue);
18918     }
18919   }
18920 }
18921 
18922 //Not part of AutoDef or Cleanup
NeedsNewSet(SeqEntryPtr sep)18923 static Boolean NeedsNewSet (SeqEntryPtr sep)
18924 {
18925   BioseqSetPtr bssp;
18926   while (sep != NULL) {
18927     if (IS_Bioseq (sep)) {
18928       return TRUE;
18929     } else if (IS_Bioseq_set (sep)) {
18930       bssp = (BioseqSetPtr) sep->data.ptrvalue;
18931       if (bssp != NULL
18932           && (bssp->_class == BioseqseqSet_class_nuc_prot
18933           || bssp->_class == BioseqseqSet_class_segset)) {
18934         return TRUE;
18935       }
18936     }
18937     sep = sep->next;
18938   }
18939   return FALSE;
18940 }
18941 
18942 
18943 //Not part of AutoDef or Cleanup
IsSingletonSet(SeqEntryPtr sep)18944 static Boolean IsSingletonSet (SeqEntryPtr sep)
18945 {
18946   BioseqSetPtr bssp;
18947   SeqAnnotPtr sap;
18948 
18949   if (sep == NULL
18950       || !IS_Bioseq_set(sep)
18951       || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL
18952       || bssp->seq_set == NULL
18953       || bssp->seq_set->next != NULL) {
18954     return FALSE;
18955   }
18956 
18957   /* not a singleton set if it has an alignment annotation */
18958   for (sap = bssp->annot; sap != NULL; sap = sap->next) {
18959     if (sap->type == 2) {
18960       return FALSE;
18961     }
18962   }
18963   return TRUE;
18964 }
18965 
18966 
18967 //Not part of AutoDef or Cleanup
AddAnnotsToSeqEntry(SeqEntryPtr sep,SeqAnnotPtr sap)18968 static void AddAnnotsToSeqEntry (SeqEntryPtr sep, SeqAnnotPtr sap)
18969 {
18970   BioseqPtr bsp;
18971   BioseqSetPtr bssp;
18972   SeqAnnotPtr last_sap;
18973 
18974   if (sep == NULL) {
18975     return;
18976   }
18977   if (IS_Bioseq(sep)) {
18978     bsp = (BioseqPtr) sep->data.ptrvalue;
18979     if (bsp->annot == NULL) {
18980       bsp->annot = sap;
18981     } else {
18982       last_sap = bsp->annot;
18983       while (last_sap->next != NULL) {
18984         last_sap = last_sap->next;
18985       }
18986       last_sap->next = sap;
18987     }
18988   } else if (IS_Bioseq_set (sep)) {
18989     bssp = (BioseqSetPtr) sep->data.ptrvalue;
18990     if (bssp->annot == NULL) {
18991       bssp->annot = sap;
18992     } else {
18993       last_sap = bssp->annot;
18994       while (last_sap->next != NULL) {
18995         last_sap = last_sap->next;
18996       }
18997       last_sap->next = sap;
18998     }
18999   }
19000 }
19001 
19002 //Not part of AutoDef or Cleanup
PromoteSingletonSetsInSet(SeqEntryPtr sep)19003 static void PromoteSingletonSetsInSet (SeqEntryPtr sep)
19004 {
19005   ObjMgrDataPtr     omdptop;
19006   ObjMgrData        omdata;
19007   BioseqSetPtr      bssp, child_bssp;
19008   SeqEntryPtr       sep_child, child_next, child_prev = NULL;
19009   ValNodePtr        titles;
19010   Uint2             top_parenttype;
19011   Pointer           top_parentptr;
19012 
19013   if (sep == NULL || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
19014     return;
19015   }
19016 
19017   SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
19018   GetSeqEntryParent (sep, &top_parentptr, &top_parenttype);
19019 
19020   for (sep_child = bssp->seq_set; sep_child != NULL; sep_child = child_next) {
19021     child_next = sep_child->next;
19022     if (IsSingletonSet(sep_child)) {
19023       child_bssp = (BioseqSetPtr) sep_child->data.ptrvalue;
19024       /* remove set title if any */
19025       titles = ValNodeExtractList (&(child_bssp->descr), Seq_descr_title);
19026       titles = SeqDescrFree (titles);
19027       /* propagate remaining descriptors */
19028       SetDescriptorPropagate (child_bssp);
19029       /* push down annotation */
19030       AddAnnotsToSeqEntry (child_bssp->seq_set, child_bssp->annot);
19031       /* replace in list */
19032       if (child_prev == NULL) {
19033         bssp->seq_set = child_bssp->seq_set;
19034       } else {
19035         child_prev->next = child_bssp->seq_set;
19036       }
19037       child_bssp->seq_set->next = child_next;
19038       child_prev = child_bssp->seq_set;
19039       child_bssp->seq_set = NULL;
19040       sep_child = SeqEntryFree (sep_child);
19041     } else {
19042       child_prev = sep_child;
19043     }
19044   }
19045 
19046   SeqMgrLinkSeqEntry (sep, top_parenttype, top_parentptr);
19047   RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
19048 }
19049 
19050 //Not part of AutoDef or Cleanup
MakeGroupsForUniqueValues(BioseqSetPtr bssp,ValNodePtr value_lists)19051 NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues
19052 (BioseqSetPtr bssp,
19053  ValNodePtr   value_lists)
19054 {
19055   ObjMgrDataPtr     omdptop;
19056   ObjMgrData        omdata;
19057   Uint2             parenttype;
19058   Pointer           parentptr;
19059   BioseqSetPtr parent_set;
19060   SeqEntryPtr  sep, first_new_sep = NULL;
19061   SeqEntryPtr  tmp;
19062   BioseqSetPtr newset;
19063   ValNodePtr   vnp;
19064   ClickableItemPtr cip;
19065   Uint1        child_class;
19066   Uint2        entityID;
19067   Boolean      child_became_parent = FALSE;
19068 
19069   if (bssp == NULL) return NULL;
19070   entityID = bssp->idx.entityID;
19071 
19072   sep = SeqMgrGetSeqEntryForData (bssp);
19073 
19074   SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
19075   GetSeqEntryParent (sep, &parentptr, &parenttype);
19076 
19077   parent_set = (BioseqSetPtr)(bssp->idx.parentptr);
19078 
19079   if (parent_set == NULL || parent_set->seq_set == NULL) {
19080     /* this set has no parent, so make it the parent set, class GenBank,
19081      * and create two new sets using the original set class as members of this set
19082      */
19083     parent_set = bssp;
19084     child_class = parent_set->_class;
19085     child_became_parent = TRUE;
19086   } else {
19087     /* we already have a parent set. */
19088     child_class = bssp->_class;
19089   }
19090 
19091   for (vnp = value_lists; vnp != NULL; vnp = vnp->next) {
19092     cip = (ClickableItemPtr) vnp->data.ptrvalue;
19093     if (cip == NULL || (!cip->chosen && ! AnyDiscrepanciesChosen (cip->subcategories))) {
19094       continue;
19095     }
19096 
19097     newset = BioseqSetNew ();
19098     newset->_class = child_class;
19099     /* add SeqEntries for this category here */
19100     AddCategorySeqEntriesToSet (newset, cip);
19101 
19102     /* check - is any member of the newset also a set? */
19103     tmp = newset->seq_set;
19104     while (tmp != NULL && IS_Bioseq (tmp)) {
19105       tmp = tmp->next;
19106     }
19107 
19108     /* add to members for parent_set */
19109     tmp = SeqEntryNew ();
19110     tmp->choice = 2;
19111     tmp->data.ptrvalue = (Pointer) newset;
19112 
19113     tmp->next = parent_set->seq_set;
19114     parent_set->seq_set = tmp;
19115 
19116     if (parent_set->idx.deleteme) {
19117       parent_set->idx.deleteme = FALSE;
19118     }
19119     if (first_new_sep == NULL) {
19120       first_new_sep = tmp;
19121     }
19122 
19123     SeqMgrLinkSeqEntry (tmp, OBJ_BIOSEQSET, parent_set);
19124   }
19125 
19126   RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
19127   DeleteMarkedObjects (entityID, 0, NULL);
19128 
19129   if (child_became_parent) {
19130     if (first_new_sep != NULL && NeedsNewSet (first_new_sep->next)) {
19131       /* make new set to hold what wasn't moved */
19132       newset = BioseqSetNew ();
19133       /* contents of set are unmoved sequences */
19134       newset->seq_set = first_new_sep->next;
19135       /* set class type */
19136       newset->_class = parent_set->_class;
19137       /* add descriptors from the parent */
19138       ValNodeLink (&(newset->descr),
19139                    AsnIoMemCopy ((Pointer) parent_set->descr,
19140                                  (AsnReadFunc) SeqDescrAsnRead,
19141                                  (AsnWriteFunc) SeqDescrAsnWrite));
19142 
19143 
19144       tmp = SeqEntryNew();
19145       tmp->choice = 2;
19146       tmp->data.ptrvalue = (Pointer) newset;
19147       tmp->next = NULL;
19148       first_new_sep->next = tmp;
19149       SeqMgrLinkSeqEntry (tmp, OBJ_BIOSEQSET, parent_set);
19150       /* need to update GatherIndex values */
19151       AssignIDsInEntity (entityID, 0, NULL);
19152     } else if (first_new_sep != NULL) {
19153       sep = first_new_sep->next;
19154       while (sep != NULL) {
19155         AddNewUniqueDescriptorsToSeqEntry (sep, parent_set->descr);
19156         sep = sep->next;
19157       }
19158     }
19159     /* set parent class to GenBank set */
19160     parent_set->_class = BioseqseqSet_class_genbank;
19161     /* remove descriptors on parent, they will all have been copied down */
19162     parent_set->descr = SeqDescrFree (parent_set->descr);
19163   }
19164 
19165   sep = SeqMgrGetSeqEntryForData (parent_set);
19166   PromoteSingletonSetsInSet (sep);
19167 
19168   ObjMgrSetDirtyFlag (entityID, TRUE);
19169   ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
19170 
19171   return parent_set;
19172 }
19173 
19174 //Not part of Autodef or Cleanup
RemoveBioseqFromAlignmentsCallback(SeqAnnotPtr sap,Pointer data)19175 static void RemoveBioseqFromAlignmentsCallback (SeqAnnotPtr sap, Pointer data)
19176 {
19177   BioseqPtr   bsp;
19178   SeqAlignPtr salphead, salp, salp_next, prev_salp, remove_salp, last_remove;
19179   SeqIdPtr    sip, tmpsip;
19180   Uint4       seqid_order;
19181 
19182   if (sap == NULL || sap->type != 2
19183       || (bsp = (BioseqPtr) data) == NULL
19184       || (salphead = (SeqAlignPtr) sap->data) == NULL) {
19185     return;
19186   }
19187 
19188   salp = salphead;
19189   prev_salp = NULL;
19190   remove_salp = NULL;
19191   last_remove = NULL;
19192   while (salp != NULL)
19193   {
19194     salp_next = salp->next;
19195     tmpsip = SeqIdPtrFromSeqAlign (salp);
19196     seqid_order = 0;
19197     for (sip = bsp->id; sip != NULL && seqid_order == 0; sip = sip->next) {
19198       seqid_order = SeqIdOrderInBioseqIdList(sip, tmpsip);
19199     }
19200     if (seqid_order == 0)
19201     {
19202       /* do nothing for this subalignment */
19203       prev_salp = salp;
19204     }
19205     else if (salp->dim == 2 || salphead->segtype ==1)
19206     {
19207       /* This is for a pairwise alignment or a DENDIAG alignment */
19208       if (prev_salp == NULL)
19209       {
19210           salphead = salp->next;
19211       }
19212       else
19213       {
19214           prev_salp->next = salp->next;
19215       }
19216       /* save the alignments that we want to free in a list and get rid of them
19217        * at the end - freeing them beforehand causes problems with listing the
19218        * IDs in the alignment.
19219        */
19220       salp->next = NULL;
19221       if (remove_salp == NULL)
19222       {
19223           remove_salp = salp;
19224       }
19225       else
19226       {
19227           last_remove->next = salp;
19228       }
19229       last_remove = salp;
19230     }
19231     else
19232     {
19233       SeqAlignBioseqDeleteById (salphead, sip);
19234       prev_salp = salp;
19235     }
19236     salp = salp_next;
19237   }
19238   /* Now we can free the alignment */
19239   SeqAlignFree (remove_salp);
19240 
19241   sap->data = salphead;
19242   if (sap->data == NULL) {
19243     sap->idx.deleteme = TRUE;
19244   }
19245 }
19246 
19247 //Not part of Autodef or Cleanup
19248 /* expect that list is a valnode list with choice OBJ_BIOSEQ and data.ptrvalue a bioseq */
MoveSequencesFromSetToWrapper(ValNodePtr list,Uint2 entityID)19249 NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID)
19250 {
19251   ObjMgrDataPtr     omdptop;
19252   ObjMgrData        omdata;
19253   Uint2             parenttype;
19254   Pointer           parentptr;
19255   BioseqSetPtr      bssp;
19256   SeqEntryPtr       sep;
19257   ValNodePtr        vnp;
19258 
19259   if (list == NULL) return;
19260   sep = GetTopSeqEntryForEntityID (entityID);
19261   if (sep == NULL
19262       || !IS_Bioseq_set (sep)
19263       || (bssp = sep->data.ptrvalue) == NULL
19264       || bssp->_class != BioseqseqSet_class_genbank) {
19265     return;
19266   }
19267 
19268   /* first, propagate descriptors */
19269   SetDescriptorPropagate (bssp);
19270 
19271   /* pull sequences out of current positions and add to top-level set */
19272   SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
19273   GetSeqEntryParent (sep, &parentptr, &parenttype);
19274 
19275   AddItemListToSet (list, bssp, FALSE);
19276 
19277   RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
19278 
19279   /* remove sequences from alignments */
19280   for (vnp = list; vnp != NULL; vnp = vnp->next)
19281   {
19282     VisitAnnotsInSep (sep, vnp->data.ptrvalue, RemoveBioseqFromAlignmentsCallback);
19283   }
19284   DeleteMarkedObjects (entityID, 0, NULL);
19285 
19286   ObjMgrSetDirtyFlag (entityID, TRUE);
19287   ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
19288 
19289 }
19290 
19291 //Not part of Autodef or cleanup
GetBioseqListCallback(BioseqPtr bsp,Pointer userdata)19292 static void GetBioseqListCallback (BioseqPtr bsp, Pointer userdata)
19293 {
19294   if (bsp != NULL && userdata != NULL && ! ISA_aa (bsp->mol))
19295   {
19296     ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_BIOSEQ, bsp);
19297   }
19298 }
19299 
19300 
19301 //Not part of Autodef or cleanup
PrepareSequenceListForSegregateByNumberOfSets(Int4 num_sets,SeqEntryPtr sep)19302 NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberOfSets (Int4 num_sets, SeqEntryPtr sep)
19303 {
19304   ValNodePtr cip_list = NULL;
19305   ValNodePtr         vnp;
19306   CharPtr            fmt = "set contains %d sequences";
19307   ValNodePtr         bsp_list = NULL, v_next;
19308   Int4               num_bioseqs, num_per_set, num_in_set;
19309 
19310   VisitBioseqsInSep (sep, &bsp_list, GetBioseqListCallback);
19311   num_bioseqs = ValNodeLen (bsp_list);
19312   num_per_set = num_bioseqs / num_sets;
19313   if (num_bioseqs % num_sets > 0) {
19314     num_per_set ++;
19315   }
19316 
19317   num_in_set = 0;
19318 
19319   for (vnp = bsp_list; vnp != NULL; vnp = v_next) {
19320     v_next = vnp->next;
19321     num_in_set ++;
19322     if (num_in_set == num_per_set) {
19323       vnp->next = NULL;
19324       ValNodeAddPointer (&cip_list, 0, NewClickableItem (0, fmt, bsp_list));
19325       bsp_list = v_next;
19326       num_in_set = 0;
19327     }
19328   }
19329   if (bsp_list != NULL) {
19330     ValNodeAddPointer (&cip_list, 0, NewClickableItem (0, fmt, bsp_list));
19331     bsp_list = NULL;
19332   }
19333   return cip_list;
19334 }
19335 
19336 
19337 //Not part of Autodef or cleanup
PrepareSequenceListForSegregateByNumberPerSet(Int4 num_per_set,SeqEntryPtr sep)19338 NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberPerSet (Int4 num_per_set, SeqEntryPtr sep)
19339 {
19340   ValNodePtr cip_list = NULL;
19341   ValNodePtr         vnp;
19342   CharPtr            fmt = "set contains %d sequences";
19343   ValNodePtr         bsp_list = NULL, v_next;
19344   Int4               num_in_set;
19345 
19346   VisitBioseqsInSep (sep, &bsp_list, GetBioseqListCallback);
19347 
19348   num_in_set = 0;
19349 
19350   for (vnp = bsp_list; vnp != NULL; vnp = v_next) {
19351     v_next = vnp->next;
19352     num_in_set ++;
19353     if (num_in_set == num_per_set) {
19354       vnp->next = NULL;
19355       ValNodeAddPointer (&cip_list, 0, NewClickableItem (0, fmt, bsp_list));
19356       bsp_list = v_next;
19357       num_in_set = 0;
19358     }
19359   }
19360   if (bsp_list != NULL) {
19361     ValNodeAddPointer (&cip_list, 0, NewClickableItem (0, fmt, bsp_list));
19362     bsp_list = NULL;
19363   }
19364   return cip_list;
19365 }
19366 
19367 
19368 //Not part of Autodef or cleanup
SegregateSetsByNumber(SeqEntryPtr sep,Int4 num_sets)19369 NLM_EXTERN void SegregateSetsByNumber (SeqEntryPtr sep, Int4 num_sets)
19370 {
19371   ValNodePtr set_list;
19372   BioseqSetPtr bssp;
19373 
19374   if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
19375     return;
19376   }
19377 
19378   set_list = PrepareSequenceListForSegregateByNumberOfSets (num_sets, sep);
19379   ChooseAllDiscrepancies(set_list);
19380 
19381   MakeGroupsForUniqueValues (bssp, set_list);
19382 
19383   FreeClickableList (set_list);
19384 
19385 }
19386 
19387 
19388 //Not part of Autodef or cleanup
SegregateSetsByNumberPerSet(SeqEntryPtr sep,Int4 num_per_set)19389 NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set)
19390 {
19391   ValNodePtr set_list;
19392   BioseqSetPtr bssp;
19393 
19394   if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
19395     return;
19396   }
19397 
19398   set_list = PrepareSequenceListForSegregateByNumberPerSet (num_per_set, sep);
19399   ChooseAllDiscrepancies(set_list);
19400 
19401   MakeGroupsForUniqueValues (bssp, set_list);
19402 
19403   FreeClickableList (set_list);
19404 
19405 }
19406 
19407 
19408 static CharPtr s_PlantGroupList[] = {
19409   "Anthocerotophyta",
19410   "Bryophyta",
19411   "Charophyceae",
19412   "Chlorokybophyceae",
19413   "Chlorophyceae",
19414   "Coleochaetophyceae",
19415   "Coniferophyta",
19416   "Cycadophyta",
19417   "Ginkgophyta",
19418   "Gnetophyta",
19419   "Klebsormidiophyceae",
19420   "Lycopodiophyta",
19421   "Magnoliophyta",
19422   "Mamiellophyceae",
19423   "Marchantiophyta",
19424   "Mesostigmatophyceae",
19425   "Moniliformopses",
19426   "Pedinophyceae",
19427   "Picocystophyceae",
19428   "Prasinophyceae",
19429   "Trebouxiophyceae",
19430   "Ulvophyceae",
19431   "Zygnemophyceae",
19432   NULL
19433 };
19434 
19435 
19436 //Not used by Autodef or cleanup
AssignBioseqToLineageGroup(BioseqPtr bsp,Pointer data)19437 static void AssignBioseqToLineageGroup (BioseqPtr bsp, Pointer data)
19438 {
19439   SeqDescPtr sdp;
19440   SeqMgrDescContext context;
19441   BioSourcePtr biop;
19442   CharPtr      lineage = NULL;
19443   ValNodePtr   list, vnp, vnp_last = NULL;
19444   ClickableItemPtr cip;
19445 
19446   if (bsp == NULL || ISA_aa (bsp->mol) || (list = (ValNodePtr) data) == NULL) {
19447     return;
19448   }
19449 
19450   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
19451   if (sdp != NULL
19452       && (biop = (BioSourcePtr) sdp->data.ptrvalue) != NULL
19453       && biop->org != NULL
19454       && biop->org->orgname != NULL) {
19455     lineage = biop->org->orgname->lineage;
19456   }
19457 
19458   for (vnp = list; vnp != NULL; vnp = vnp->next) {
19459     vnp_last = vnp;
19460     cip = (ClickableItemPtr) vnp->data.ptrvalue;
19461     if (vnp->next == NULL
19462         || StringISearch (lineage, cip->description) != NULL) {
19463       ValNodeAddPointer (&(cip->item_list), OBJ_BIOSEQ, bsp);
19464       break;
19465     }
19466   }
19467 }
19468 
19469 
19470 //Not used by Autodef or cleanup
MakeLineageGroupList(SeqEntryPtr sep,CharPtr PNTR lineage_strings)19471 static ValNodePtr MakeLineageGroupList (SeqEntryPtr sep, CharPtr PNTR lineage_strings)
19472 {
19473   ClickableItemPtr cip;
19474   Int4 i;
19475   ValNodePtr list = NULL, vnp, vnp_prev = NULL, vnp_next;
19476 
19477   for (i = 0; lineage_strings[i] != NULL; i++) {
19478     cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
19479     cip->description = StringSave(lineage_strings[i]);
19480     ValNodeAddPointer (&list, 0, cip);
19481   }
19482   cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
19483   cip->description = StringSave("Other");
19484   ValNodeAddPointer (&list, 0, cip);
19485 
19486 
19487   VisitBioseqsInSep (sep, list, AssignBioseqToLineageGroup);
19488 
19489   for (vnp = list; vnp != NULL; vnp = vnp_next) {
19490     vnp_next = vnp->next;
19491     cip = (ClickableItemPtr) vnp->data.ptrvalue;
19492     if (cip->item_list == NULL) {
19493       if (vnp_prev == NULL) {
19494         list = vnp_next;
19495       } else {
19496         vnp_prev->next = vnp_next;
19497       }
19498       vnp->next = NULL;
19499       vnp->data.ptrvalue = ClickableItemFree (vnp->data.ptrvalue);
19500       vnp = ValNodeFree (vnp);
19501     } else {
19502       vnp_prev = vnp;
19503     }
19504   }
19505   return list;
19506 }
19507 
19508 
19509 //Not used by Autodef or cleanup
MakePlantGroupList(SeqEntryPtr sep)19510 static ValNodePtr MakePlantGroupList (SeqEntryPtr sep)
19511 {
19512   return MakeLineageGroupList (sep, s_PlantGroupList);
19513 }
19514 
19515 
SegregateSetsByPlantGroup(SeqEntryPtr sep)19516 NLM_EXTERN void SegregateSetsByPlantGroup (SeqEntryPtr sep)
19517 {
19518   ValNodePtr set_list;
19519   BioseqSetPtr bssp;
19520 
19521   if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
19522     return;
19523   }
19524 
19525   set_list = MakePlantGroupList(sep);
19526   ChooseAllDiscrepancies(set_list);
19527 
19528   MakeGroupsForUniqueValues (bssp, set_list);
19529 
19530   FreeClickableList (set_list);
19531 }
19532 
19533 
19534 static CharPtr s_FungusGroupList[] = {
19535   "Arthoniomycetes",
19536   "Dothideomycetes",
19537   "Eurotiomycetes",
19538   "Geoglossomycetes",
19539   "Laboulbeniomycetes",
19540   "Lecanoromycetes",
19541   "Leotiomycetes",
19542   "Lichinomycetes",
19543   "Orbiliomycetes",
19544   "Pezizomycetes",
19545   "Sordariomycetes",
19546   "Saccharomycetes",
19547   "Neolectomycetes",
19548   "Pneumocystidomycetes",
19549   "Schizosaccharomycetes",
19550   "Taphrinomycetes",
19551   "Agaricomycetes",
19552   "Dacrymycetes",
19553   "Tremellomycetes",
19554   "Entorrhizomycetes",
19555   "Wallemiomycetes",
19556   "Agaricostilbomycetes",
19557   "Atractiellomycetes",
19558   "Exobasidiomycetes",
19559   "Ustilaginomycetes",
19560   "Blastocladiomycetes",
19561   "Chytridiomycetes",
19562   "Monoblepharidomycetes",
19563   "Glomeromycetes",
19564   "Neocallimastigomycetes",
19565   "Entomophthoromycotina",
19566   "Kickxellomycotina",
19567   "Mucoromycotina",
19568   "Zoopagomycotina",
19569   "Microsporidia",
19570   NULL
19571 };
19572 
19573 //Not used by Autodef or cleanup
SegregateSetsByFungusGroup(SeqEntryPtr sep)19574 NLM_EXTERN void SegregateSetsByFungusGroup (SeqEntryPtr sep)
19575 {
19576   ValNodePtr set_list;
19577   BioseqSetPtr bssp;
19578 
19579   if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
19580     return;
19581   }
19582 
19583   set_list = MakeLineageGroupList(sep, s_FungusGroupList);
19584   ChooseAllDiscrepancies(set_list);
19585 
19586   MakeGroupsForUniqueValues (bssp, set_list);
19587 
19588   FreeClickableList (set_list);
19589 }
19590 
19591 
19592 //Not part of Autodef or Cleanup
PrepareSequenceListForSegregateByBioseqList(SeqEntryPtr sep,ValNodePtr bsp_list)19593 NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByBioseqList (SeqEntryPtr sep, ValNodePtr bsp_list)
19594 {
19595   ValNodeBlock b_list;
19596   ValNodePtr   cip_list = NULL, vnp;
19597   ClickableItemPtr cip;
19598 
19599   InitValNodeBlock (&b_list, NULL);
19600   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
19601     ValNodeAddPointerToEnd (&b_list, OBJ_BIOSEQ, vnp->data.ptrvalue);
19602   }
19603   if (b_list.head != NULL) {
19604     cip = (ClickableItemPtr) MemNew (sizeof (ClickableItemData));
19605     cip->item_list = b_list.head;
19606     ValNodeAddPointer (&cip_list, 0, cip);
19607   }
19608   return cip_list;
19609 }
19610 
19611 //Not part of Autodef or Cleanup
RemoveFoundBioseqFromAlignment(BioseqPtr bsp,Pointer data)19612 static void RemoveFoundBioseqFromAlignment (BioseqPtr bsp, Pointer data)
19613 {
19614   SeqAlignPtr PNTR pSalp;
19615   SeqIdPtr sip;
19616 
19617   if (bsp == NULL || (pSalp = (SeqAlignPtr PNTR) data) == NULL || *pSalp == NULL) {
19618     return;
19619   }
19620   for (sip = bsp->id; sip != NULL; sip = sip->next) {
19621     *pSalp = RemoveOneSequenceFromAlignment (sip, *pSalp);
19622   }
19623 }
19624 
19625 
19626 //Not part of Autodef or Cleanup
RemoveSequencesFromOtherSepFromAlignment(SeqEntryPtr exclude,SeqAnnotPtr sap)19627 static void RemoveSequencesFromOtherSepFromAlignment (SeqEntryPtr exclude, SeqAnnotPtr sap)
19628 {
19629   SeqAlignPtr salp;
19630   BioseqPtr   bsp;
19631   BioseqSetPtr bssp;
19632 
19633   if (exclude == NULL || sap == NULL || sap->type != 2 || (salp = (SeqAlignPtr)sap->data) == NULL) {
19634     return;
19635   }
19636   if (IS_Bioseq (exclude) && (bsp = (BioseqPtr) exclude->data.ptrvalue) != NULL) {
19637     RemoveFoundBioseqFromAlignment (bsp, &salp);
19638   } else if (IS_Bioseq_set (exclude) && (bssp = (BioseqSetPtr) exclude->data.ptrvalue) != NULL) {
19639     VisitBioseqsInSet (bssp, &salp, RemoveFoundBioseqFromAlignment);
19640   }
19641   sap->data = salp;
19642   if (sap->data == NULL) {
19643     sap->idx.deleteme = TRUE;
19644   }
19645 }
19646 
19647 
19648 //Not part of Autodef or Cleanup
CopyAnnotToOtherSeq(SeqAnnotPtr sap,SeqEntryPtr list,SeqEntryPtr not_this_one)19649 static void CopyAnnotToOtherSeq (SeqAnnotPtr sap, SeqEntryPtr list, SeqEntryPtr not_this_one)
19650 {
19651   SeqEntryPtr sep;
19652   BioseqSetPtr bssp;
19653   BioseqPtr    bsp;
19654   SeqAnnotPtr  copy;
19655 
19656   if (sap == NULL) {
19657     return;
19658   }
19659   for (sep = list; sep != NULL; sep = sep->next) {
19660     if (sep != not_this_one) {
19661       if (IS_Bioseq (sep) && (bsp = (BioseqPtr) sep->data.ptrvalue) != NULL) {
19662         copy = (SeqAnnotPtr) AsnIoMemCopy (sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
19663         RemoveSequencesFromOtherSepFromAlignment (not_this_one, copy);
19664         copy->next = bsp->annot;
19665         bsp->annot = copy;
19666       } else if (IS_Bioseq_set (sep) && (bssp = (BioseqSetPtr) sep->data.ptrvalue) != NULL) {
19667         copy = (SeqAnnotPtr) AsnIoMemCopy (sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
19668         RemoveSequencesFromOtherSepFromAlignment (not_this_one, copy);
19669         copy->next = bssp->annot;
19670         bssp->annot = copy;
19671       }
19672     }
19673   }
19674 }
19675 
19676 
19677 //Not part of Autodef or Cleanup
FixOriginalCopiedAlignment(SeqAnnotPtr sap,SeqEntryPtr list,SeqEntryPtr not_this_one)19678 static void FixOriginalCopiedAlignment (SeqAnnotPtr sap, SeqEntryPtr list, SeqEntryPtr not_this_one)
19679 {
19680   SeqEntryPtr sep;
19681 
19682   for (sep = list; sep != NULL; sep = sep->next) {
19683     if (sep != not_this_one) {
19684       RemoveSequencesFromOtherSepFromAlignment (sep, sap);
19685     }
19686   }
19687 }
19688 
19689 
19690 //Not part of Autodef or Cleanup
CopyBioseqListAlignments(BioseqSetPtr parent)19691 static void CopyBioseqListAlignments (BioseqSetPtr parent)
19692 {
19693   SeqEntryPtr sep;
19694   SeqAnnotPtr sap;
19695   BioseqSetPtr bssp;
19696   BioseqPtr    bsp;
19697 
19698   for (sep = parent->seq_set; sep != NULL; sep = sep->next) {
19699     sap = NULL;
19700     if (IS_Bioseq (sep) && (bsp = (BioseqPtr) sep->data.ptrvalue) != NULL) {
19701       sap = bsp->annot;
19702     } else if (IS_Bioseq_set (sep) && (bssp = (BioseqSetPtr) sep->data.ptrvalue) != NULL) {
19703       sap = bssp->annot;
19704     }
19705     while (sap != NULL) {
19706       if (sap->type == 2 && sap->data != NULL) {
19707         CopyAnnotToOtherSeq (sap, parent->seq_set, sep);
19708         FixOriginalCopiedAlignment (sap, parent->seq_set, sep);
19709       }
19710       sap = sap->next;
19711     }
19712   }
19713 }
19714 
19715 
19716 //Not part of Autodef or Cleanup
SegregateSetsByBioseqList(SeqEntryPtr sep,ValNodePtr vnp)19717 NLM_EXTERN void SegregateSetsByBioseqList (SeqEntryPtr sep, ValNodePtr vnp)
19718 {
19719   ValNodePtr set_list;
19720   BioseqSetPtr bssp;
19721 
19722   if (sep == NULL || !IS_Bioseq_set (sep) || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
19723     return;
19724   }
19725 
19726   set_list = PrepareSequenceListForSegregateByBioseqList (sep, vnp);
19727   ChooseAllDiscrepancies(set_list);
19728 
19729   MakeGroupsForUniqueValues (bssp, set_list);
19730   CopyBioseqListAlignments (bssp);
19731   DeleteMarkedObjects (bssp->idx.entityID, 0, NULL);
19732   FreeClickableList (set_list);
19733 
19734 }
19735 
19736 //Not used for Autodef or Cleanup
SeqAnnotIsPairwiseAlignment(SeqAnnotPtr sap,Pointer data)19737 static void SeqAnnotIsPairwiseAlignment (SeqAnnotPtr sap, Pointer data)
19738 {
19739   BoolPtr     is;
19740   SeqAlignPtr salphead, salp;
19741 
19742   if (sap == NULL || sap->type != 2
19743       || (salphead = (SeqAlignPtr) sap->data) == NULL
19744       || (is = (BoolPtr) data) == NULL || *is)
19745   {
19746     return;
19747   }
19748   for (salp = salphead; salp != NULL && !*is; salp = salp->next)
19749   {
19750     if (salp->dim == 2)
19751     {
19752       *is = TRUE;
19753     }
19754   }
19755 }
19756 
19757 //Not used for Autodef or Cleanup
SeqEntryHasPairwiseAlignments(SeqEntryPtr sep)19758 NLM_EXTERN Boolean SeqEntryHasPairwiseAlignments (SeqEntryPtr sep)
19759 {
19760   Boolean rval = FALSE;
19761 
19762   VisitAnnotsInSep (sep, &rval, SeqAnnotIsPairwiseAlignment);
19763   return rval;
19764 }
19765 
19766 
19767 typedef  Boolean  (*Nlm_ParseProc) PROTO ((CharPtr, Pointer));
19768 
19769 //Not used for Autodef or Cleanup
SkipToken(CharPtr cp,Pointer data)19770 static Boolean SkipToken (CharPtr cp, Pointer data)
19771 {
19772   return TRUE;
19773 }
19774 
19775 //Not used for Autodef or Cleanup
ParseLineOfTokens(CharPtr line,Nlm_ParseProc PNTR token_funcs,Pointer data)19776 static Boolean ParseLineOfTokens (CharPtr line, Nlm_ParseProc PNTR token_funcs, Pointer data)
19777 {
19778   CharPtr cp, cp_next;
19779   Char    ch_was;
19780   Int4    token_num = 0;
19781   Boolean rval = TRUE;
19782 
19783   if (StringHasNoText (line) || token_funcs == NULL) {
19784     return FALSE;
19785   }
19786 
19787   cp = line;
19788   cp_next = StringChr (cp, '\t');
19789   while (cp_next != NULL && rval && token_funcs[token_num] != NULL) {
19790     ch_was = *cp_next;
19791     *cp_next = 0;
19792     rval = token_funcs[token_num] (cp, data);
19793     *cp_next = ch_was;
19794     cp = cp_next + 1;
19795     cp_next = StringChr (cp, '\t');
19796     token_num++;
19797   }
19798 
19799   if (rval && token_funcs[token_num] != NULL) {
19800     /* last token_func for end of line */
19801     rval = token_funcs[token_num](cp, data);
19802     token_num++;
19803     while (token_funcs[token_num] != NULL && rval) {
19804       rval = token_funcs[token_num](NULL, data);
19805       token_num++;
19806     }
19807   }
19808   return rval;
19809 }
19810 
19811 
19812 /* Output from Fungal ITS sequence extractor:
19813  * first column is ID (after position in set) and length
19814  * next column is ITS1 or ---- (---- means it's not there)
19815  * next column is ITS2 or ---- (---- means it's not there)
19816  * next column is range for ITS1 (or -----)
19817  * next column is range for ITS2 (or -----)
19818  * next column (if present) indicates reverse complement.
19819  */
19820 
19821 typedef struct extractorinfo {
19822   CharPtr id;
19823   Int4    length;
19824   Boolean has_its1;
19825   Boolean has_its2;
19826   CharPtr its1_range;
19827   CharPtr its2_range;
19828   Boolean is_complement;
19829 } ExtractorInfoData, PNTR ExtractorInfoPtr;
19830 
19831 //Not used for Autodef or Cleanup
ExtractorInfoNew()19832 static ExtractorInfoPtr ExtractorInfoNew ()
19833 {
19834   ExtractorInfoPtr ep = (ExtractorInfoPtr) MemNew (sizeof (ExtractorInfoData));
19835   MemSet (ep, 0, sizeof (ExtractorInfoData));
19836   return ep;
19837 }
19838 
19839 //Not used for Autodef or Cleanup
ExtractorInfoFree(ExtractorInfoPtr ep)19840 static ExtractorInfoPtr ExtractorInfoFree (ExtractorInfoPtr ep)
19841 {
19842   if (ep != NULL) {
19843     ep->id = MemFree (ep->id);
19844     ep->its1_range = MemFree (ep->its1_range);
19845     ep->its2_range = MemFree (ep->its2_range);
19846     ep = MemFree (ep);
19847   }
19848   return ep;
19849 }
19850 
19851 //Not used for Autodef or Cleanup
ParseExtractorIdAndLength(CharPtr cp,Pointer data)19852 static Boolean ParseExtractorIdAndLength (CharPtr cp, Pointer data)
19853 {
19854   ExtractorInfoPtr ep;
19855   CharPtr div, id_start, id_end;
19856   Int4    len;
19857   Char    ch_was;
19858 
19859   if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
19860     return FALSE;
19861   }
19862   /* separate length and ID */
19863   len = StringLen (cp);
19864   if (len < 4) {
19865     return FALSE;
19866   }
19867   if (StringCmp (cp + len - 4, " bp.") != 0) {
19868     return FALSE;
19869   }
19870 
19871   div = cp + len - 5;
19872   while (div > cp && isdigit (*div)) {
19873     div--;
19874   }
19875   if (!isdigit (*(div + 1))) {
19876     return FALSE;
19877   }
19878   ep->length = atoi (div + 1);
19879 
19880   /* skip over the part that indicates the position of the sequence (1 of N, 2 of N, etc.) */
19881   id_start = StringChr (cp, ')');
19882   if (id_start == NULL) {
19883     return FALSE;
19884   }
19885   id_start++;
19886   while (isspace (*id_start)) {
19887     id_start++;
19888   }
19889 
19890   if (id_start >= div) {
19891     return FALSE;
19892   }
19893 
19894   /* if we have a list of IDs, truncate after just the first one */
19895   id_end = StringChr (id_start, '|');
19896   if (id_end != NULL && id_end < div) {
19897     id_end = StringChr (id_end + 1, '|');
19898     if (id_end != NULL && id_end < div) {
19899       div = id_end;
19900     }
19901   }
19902 
19903   ch_was = *div;
19904   *div = 0;
19905   ep->id = StringSave (id_start);
19906   *div = ch_was;
19907   /* trim spaces from end of ID */
19908   cp = ep->id + StringLen (ep->id) - 1;
19909   while (cp > ep->id && isspace (*cp)) {
19910     cp--;
19911   }
19912   *(cp + 1) = 0;
19913   return TRUE;
19914 }
19915 
19916 //Not used for Autodef or Cleanup
ParseHasITS1(CharPtr cp,Pointer data)19917 static Boolean ParseHasITS1 (CharPtr cp, Pointer data)
19918 {
19919   ExtractorInfoPtr ep;
19920 
19921   if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
19922     return FALSE;
19923   }
19924 
19925   if (StringCmp (cp, "ITS1") == 0) {
19926     ep->has_its1 = TRUE;
19927   } else if (StringCmp (cp, "----") == 0) {
19928     ep->has_its1 = FALSE;
19929   } else {
19930     return FALSE;
19931   }
19932 
19933   return TRUE;
19934 }
19935 
19936 //Not used for Autodef or Cleanup
ParseHasITS2(CharPtr cp,Pointer data)19937 static Boolean ParseHasITS2 (CharPtr cp, Pointer data)
19938 {
19939   ExtractorInfoPtr ep;
19940 
19941   if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
19942     return FALSE;
19943   }
19944 
19945   if (StringCmp (cp, "ITS2") == 0) {
19946     ep->has_its2 = TRUE;
19947   } else if (StringCmp (cp, "----") == 0) {
19948     ep->has_its2 = FALSE;
19949   } else {
19950     return FALSE;
19951   }
19952 
19953   return TRUE;
19954 }
19955 
19956 //Not used for Autodef or Cleanup
ParseITS1Range(CharPtr cp,Pointer data)19957 static Boolean ParseITS1Range (CharPtr cp, Pointer data)
19958 {
19959   ExtractorInfoPtr ep;
19960   Boolean          rval = TRUE;
19961 
19962   if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
19963     return FALSE;
19964   }
19965 
19966   if (*cp == '-') {
19967     if (ep->has_its1) {
19968       rval = FALSE;
19969     } else {
19970       rval = TRUE;
19971     }
19972   } else if (StringNCmp (cp, "ITS1: ", 6) == 0) {
19973     if (ep->has_its1) {
19974       ep->its1_range = StringSave (cp + 6);
19975     } else {
19976       rval = FALSE;
19977     }
19978   } else {
19979     rval = FALSE;
19980   }
19981   return rval;
19982 }
19983 
19984 //Not used for Autodef or Cleanup
ParseITS2Range(CharPtr cp,Pointer data)19985 static Boolean ParseITS2Range (CharPtr cp, Pointer data)
19986 {
19987   ExtractorInfoPtr ep;
19988   Boolean          rval = TRUE;
19989 
19990   if (StringHasNoText (cp) || (ep = (ExtractorInfoPtr) data) == NULL) {
19991     return FALSE;
19992   }
19993 
19994   if (*cp == '-') {
19995     if (ep->has_its2) {
19996       rval = FALSE;
19997     } else {
19998       rval = TRUE;
19999     }
20000   } else if (StringNCmp (cp, "ITS2: ", 6) == 0) {
20001     if (ep->has_its2) {
20002       ep->its2_range = StringSave (cp + 6);
20003     } else {
20004       rval = FALSE;
20005     }
20006   } else {
20007     rval = FALSE;
20008   }
20009   return rval;
20010 }
20011 
20012 //Not used for Autodef or Cleanup
ParseIsComplement(CharPtr cp,Pointer data)20013 static Boolean ParseIsComplement (CharPtr cp, Pointer data)
20014 {
20015   ExtractorInfoPtr ep;
20016   Boolean rval = TRUE;
20017 
20018   if ((ep = (ExtractorInfoPtr) data) == NULL) {
20019     return FALSE;
20020   }
20021   if (StringHasNoText (cp)) {
20022     ep->is_complement = FALSE;
20023   } else if (StringNCmp (cp, "Reverse complementary", 21) == 0) {
20024     ep->is_complement = TRUE;
20025   } else {
20026     rval = FALSE;
20027   }
20028   return rval;
20029 }
20030 
20031 
20032 static Nlm_ParseProc token_parsers[] = {
20033   ParseExtractorIdAndLength,
20034   ParseHasITS1,
20035   ParseHasITS2,
20036   SkipToken,
20037   SkipToken,
20038   ParseITS1Range,
20039   ParseITS2Range,
20040   ParseIsComplement,
20041   NULL};
20042 
20043 
20044 typedef enum {
20045   eExtractorFeat18S = 0,
20046   eExtractorFeatITS1,
20047   eExtractorFeat58S,
20048   eExtractorFeatITS2,
20049   eExtractorFeat28S
20050 } EExtractorFeat;
20051 
20052 CharPtr extractor_feature_labels[] = {
20053   "18S ribosomal RNA",
20054   "internal transcribed spacer 1",
20055   "5.8S ribosomal RNA",
20056   "internal transcribed spacer 2",
20057   "28S ribosomal RNA"
20058 };
20059 
20060 
20061 typedef struct rnafeatlist {
20062   CharPtr id;
20063   Boolean has_feat[eExtractorFeat28S + 1];
20064   Int4    feat_pos;
20065   Boolean is_complement;
20066   CharPtr error;
20067 } RNAFeatListData, PNTR RNAFeatListPtr;
20068 
20069 //Not used for Autodef or Cleanup
RNAFeatListNew()20070 static RNAFeatListPtr RNAFeatListNew ()
20071 {
20072   RNAFeatListPtr ep = (RNAFeatListPtr) MemNew (sizeof (RNAFeatListData));
20073   MemSet (ep, 0, sizeof (RNAFeatListData));
20074   return ep;
20075 }
20076 
20077 //Not used for Autodef or Cleanup
RNAFeatListFree(RNAFeatListPtr ep)20078 static RNAFeatListPtr RNAFeatListFree (RNAFeatListPtr ep)
20079 {
20080   if (ep != NULL) {
20081     ep->id = MemFree (ep->id);
20082     ep->error = MemFree (ep->error);
20083     ep = MemFree (ep);
20084   }
20085   return ep;
20086 }
20087 
20088 //Not used for Autodef or Cleanup
MakeLabelFromExtractorInfo(ExtractorInfoPtr ep)20089 static CharPtr MakeLabelFromExtractorInfo (ExtractorInfoPtr ep)
20090 {
20091   Boolean feat_present[5];
20092   CharPtr cp, label;
20093   Int4 len, i, num_feat = 0, feat_num = 0;
20094 
20095   if (ep == NULL) {
20096     return NULL;
20097   }
20098 
20099   MemSet (feat_present, 0, sizeof (feat_present));
20100   if (ep->has_its1) {
20101     feat_present[eExtractorFeatITS1] = TRUE;
20102     if (StringNCmp (ep->its1_range, "1-", 2) == 0) {
20103       feat_present[eExtractorFeat18S] = FALSE;
20104     } else {
20105       feat_present[eExtractorFeat18S] = TRUE;
20106     }
20107     if (ep->has_its2) {
20108       feat_present[eExtractorFeat58S] = TRUE;
20109       feat_present[eExtractorFeatITS2] = TRUE;
20110       cp = StringChr (ep->its2_range, '-');
20111       if (cp != NULL && StringCmp (cp + 1, "end") == 0) {
20112         feat_present[eExtractorFeat28S] = FALSE;
20113       } else {
20114         feat_present[eExtractorFeat28S] = TRUE;
20115       }
20116     } else {
20117       cp = StringChr (ep->its1_range, '-');
20118       if (cp != NULL && StringCmp (cp + 1, "end") == 0) {
20119         feat_present[eExtractorFeat58S] = FALSE;
20120       } else {
20121         feat_present[eExtractorFeat58S] = TRUE;
20122       }
20123       feat_present[eExtractorFeatITS2] = FALSE;
20124       feat_present[eExtractorFeat28S] = FALSE;
20125     }
20126   } else {
20127     feat_present[eExtractorFeat18S] = FALSE;
20128     feat_present[eExtractorFeatITS1] = FALSE;
20129     if (StringNCmp (ep->its2_range, "1-", 2) == 0) {
20130       feat_present[eExtractorFeat58S] = FALSE;
20131     } else {
20132       feat_present[eExtractorFeat58S] = TRUE;
20133     }
20134     feat_present[eExtractorFeatITS2] = TRUE;
20135     cp = StringChr (ep->its2_range, '-');
20136     if (cp != NULL && StringCmp (cp + 1, "end") == 0) {
20137       feat_present[eExtractorFeat28S] = FALSE;
20138     } else {
20139       feat_present[eExtractorFeat28S] = TRUE;
20140     }
20141   }
20142 
20143   len = 15;
20144   for (i = 0; i < 5; i++) {
20145     if (feat_present[i]) {
20146       len += StringLen (extractor_feature_labels[i]) + 2;
20147       num_feat++;
20148     } else if (num_feat > 0) {
20149       break;
20150     }
20151   }
20152   label = (CharPtr) MemNew (sizeof (Char) * len);
20153   sprintf (label, "contains ");
20154   for (i = 0; i < 5; i++) {
20155     if (feat_present[i]) {
20156       if (feat_num > 0) {
20157         if (feat_num == num_feat - 1) {
20158           if (num_feat == 2) {
20159             StringCat (label, " and ");
20160           } else {
20161             StringCat (label, ", and ");
20162           }
20163         } else {
20164           StringCat (label, ", ");
20165         }
20166       }
20167       StringCat (label, extractor_feature_labels[i]);
20168       feat_num++;
20169     } else if (feat_num > 0) {
20170       break;
20171     }
20172   }
20173   return label;
20174 }
20175 
20176 
20177 //Not used for Autodef or Cleanup
ParseExtractorIdOnly(CharPtr cp,Pointer data)20178 static Boolean ParseExtractorIdOnly (CharPtr cp, Pointer data)
20179 {
20180   RNAFeatListPtr ep;
20181   CharPtr div = NULL, id_start, id_end;
20182   Char    ch_was;
20183 
20184   if (StringHasNoText (cp) || (ep = (RNAFeatListPtr) data) == NULL) {
20185     return FALSE;
20186   }
20187 
20188   id_start = cp;
20189   while (isspace (*id_start)) {
20190     id_start++;
20191   }
20192 
20193   if (*id_start == 0) {
20194     return FALSE;
20195   }
20196 
20197   /* if we have a list of IDs, truncate after just the first one */
20198   id_end = StringChr (id_start, '|');
20199   if (id_end != NULL && id_end < div) {
20200     id_end = StringChr (id_end + 1, '|');
20201     if (id_end != NULL) {
20202       div = id_end;
20203     }
20204   }
20205 
20206   if (div != NULL) {
20207     ch_was = *div;
20208     *div = 0;
20209   }
20210   ep->id = StringSave (id_start);
20211   if (div != NULL) {
20212     *div = ch_was;
20213   }
20214   /* trim spaces from end of ID */
20215   cp = ep->id + StringLen (ep->id) - 1;
20216   while (cp > ep->id && isspace (*cp)) {
20217     cp--;
20218   }
20219   *(cp + 1) = 0;
20220   return TRUE;
20221 }
20222 
20223 //Not used for Autodef or Cleanup
ParseNewComplement(CharPtr cp,Pointer data)20224 static Boolean ParseNewComplement (CharPtr cp, Pointer data)
20225 {
20226   RNAFeatListPtr ep;
20227   Boolean rval = TRUE;
20228 
20229   if ((ep = (RNAFeatListPtr) data) == NULL) {
20230     return FALSE;
20231   }
20232   if (StringHasNoText (cp) || StringCmp (cp, "0") == 0) {
20233     ep->is_complement = FALSE;
20234   } else if (StringCmp (cp, "1") == 0) {
20235     ep->is_complement = TRUE;
20236   } else {
20237     rval = FALSE;
20238   }
20239   return rval;
20240 }
20241 
20242 
20243 static CharPtr sIgnoreRNAErrors[] = {
20244   "Broken or partial sequence, no 5.8S!",
20245   "Broken or partial sequence, only partial 5.8S!",
20246   NULL};
20247 
20248 //Not used for Autodef or Cleanup
ParseRNAError(CharPtr cp,Pointer data)20249 static Boolean ParseRNAError (CharPtr cp, Pointer data)
20250 {
20251   RNAFeatListPtr ep;
20252   Boolean rval = TRUE, ignore = FALSE;
20253   Int4 j;
20254 
20255   if ((ep = (RNAFeatListPtr) data) == NULL) {
20256     return FALSE;
20257   }
20258   if (!StringHasNoText (cp)) {
20259     for (j = 0; sIgnoreRNAErrors[j] != NULL && !ignore; j++) {
20260       if (StringNICmp (cp, sIgnoreRNAErrors[j], StringLen (sIgnoreRNAErrors[j])) == 0) {
20261         ignore = TRUE;
20262       }
20263     }
20264     if (!ignore) {
20265       ep->error = StringSave (cp);
20266     }
20267   }
20268   return rval;
20269 }
20270 
20271 //Not used for Autodef or Cleanup
ParseRNARange(CharPtr cp,Pointer data)20272 static Boolean ParseRNARange (CharPtr cp, Pointer data)
20273 {
20274   RNAFeatListPtr ep;
20275   Boolean rval = TRUE;
20276   CharPtr colon;
20277 
20278   if ((ep = (RNAFeatListPtr) data) == NULL) {
20279     return FALSE;
20280   }
20281   colon = StringChr (cp, ':');
20282   if (colon == NULL) {
20283     return FALSE;
20284   }
20285   colon++;
20286   while (isspace (*colon)) {
20287     colon++;
20288   }
20289   if (StringICmp (colon, "Not found") == 0) {
20290     ep->has_feat[ep->feat_pos] = FALSE;
20291     ep->feat_pos++;
20292   } else if (StringICmp (colon, "No end") == 0) {
20293     ep->has_feat[ep->feat_pos] = TRUE;
20294     ep->feat_pos++;
20295   } else if (StringICmp (colon, "No start") == 0) {
20296     ep->has_feat[ep->feat_pos] = TRUE;
20297     ep->feat_pos++;
20298   } else if (!isdigit (*colon)) {
20299     rval = FALSE;
20300   } else {
20301     ep->has_feat[ep->feat_pos] = TRUE;
20302     ep->feat_pos++;
20303   }
20304   return rval;
20305 }
20306 
20307 
20308 static Nlm_ParseProc new_token_parsers[] = {
20309   ParseExtractorIdOnly,
20310   SkipToken,
20311   ParseRNARange,
20312   ParseRNARange,
20313   ParseRNARange,
20314   ParseRNARange,
20315   ParseRNARange,
20316   ParseRNAError,
20317   ParseNewComplement,
20318   NULL};
20319 
20320 //Not used for Autodef or Cleanup
MakeLabelFromRNAFeatList(RNAFeatListPtr ep)20321 static CharPtr MakeLabelFromRNAFeatList (RNAFeatListPtr ep)
20322 {
20323   CharPtr label;
20324   Int4 len, i, num_feat = 0, feat_num = 0;
20325 
20326   if (ep == NULL) {
20327     return NULL;
20328   }
20329 
20330   len = 15;
20331   for (i = 0; i < 5; i++) {
20332     if (ep->has_feat[i]) {
20333       len += StringLen (extractor_feature_labels[i]) + 2;
20334       num_feat++;
20335     } else if (num_feat > 0) {
20336       break;
20337     }
20338   }
20339   label = (CharPtr) MemNew (sizeof (Char) * len);
20340   sprintf (label, "contains ");
20341   for (i = 0; i < 5; i++) {
20342     if (ep->has_feat[i]) {
20343       if (feat_num > 0) {
20344         if (feat_num == num_feat - 1) {
20345           if (num_feat == 2) {
20346             StringCat (label, " and ");
20347           } else {
20348             StringCat (label, ", and ");
20349           }
20350         } else {
20351           StringCat (label, ", ");
20352         }
20353       }
20354       StringCat (label, extractor_feature_labels[i]);
20355       feat_num++;
20356     } else if (feat_num > 0) {
20357       break;
20358     }
20359   }
20360   return label;
20361 }
20362 
20363 
20364 //Not used for Autodef or Cleanup
RevCompFeats(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)20365 NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
20366 
20367 {
20368   BioseqPtr     bsp;
20369   BioseqSetPtr  bssp;
20370   SeqAnnotPtr   sap;
20371   SeqFeatPtr    sfp;
20372 
20373   if (mydata == NULL) return;
20374   if (sep == NULL || sep->data.ptrvalue == NULL) return;
20375   if (IS_Bioseq (sep)) {
20376     bsp = (BioseqPtr) sep->data.ptrvalue;
20377     sap = bsp->annot;
20378   } else if (IS_Bioseq_set (sep)) {
20379     bssp = (BioseqSetPtr) sep->data.ptrvalue;
20380     sap = bssp->annot;
20381   } else return;
20382   bsp = (BioseqPtr) mydata;
20383   if (bsp == NULL) return;
20384   if (! ISA_na (bsp->mol)) return;
20385   while (sap != NULL) {
20386     if (sap->type == 1) {
20387       sfp = (SeqFeatPtr) sap->data;
20388       while (sfp != NULL) {
20389         RevCompOneFeatForBioseq (sfp, bsp);
20390         sfp = sfp->next;
20391       }
20392     }
20393     sap = sap->next;
20394   }
20395 }
20396 
20397 //Not used for Autodef or Cleanup
GetBioseqFromExtractorTextId(CharPtr id,CharPtr line,SeqEntryPtr sep)20398 static BioseqPtr GetBioseqFromExtractorTextId (CharPtr id, CharPtr line, SeqEntryPtr sep)
20399 {
20400   Int4 len;
20401   BioseqPtr bsp;
20402   SeqIdPtr  sip;
20403 
20404   if (StringHasNoText (id)) {
20405     Message (MSG_POSTERR, "No id for line %s", line);
20406     return NULL;
20407   }
20408   /* figure out ID */
20409   len = StringLen (id);
20410   if (len > 3 && id[len - 1] == '.' && id[len - 2] == '.' && id[len - 3] == '.') {
20411     Message (MSG_POSTERR, "ID was truncated for line %s", line);
20412     return NULL;
20413   }
20414   sip = CreateSeqIdFromText (id, sep);
20415   bsp = BioseqFind (sip);
20416   sip = SeqIdFree (sip);
20417   if (bsp == NULL) {
20418     Message (MSG_POSTERR, "ID for sequence not present in record in line %s", line);
20419     return NULL;
20420   }
20421   return bsp;
20422 }
20423 
20424 
20425 //Not used for Autodef or Cleanup
MakeMiscRNAWithLabel(BioseqPtr bsp,CharPtr label)20426 static SeqFeatPtr MakeMiscRNAWithLabel(BioseqPtr bsp, CharPtr label)
20427 {
20428   SeqFeatPtr sfp;
20429   RnaRefPtr rrp;
20430   RNAGenPtr rgp;
20431 
20432   /* make feature and attach to appropriate annots */
20433   sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL);
20434   rrp = RnaRefNew ();
20435   sfp->data.value.ptrvalue = rrp;
20436   rrp->type = 255;
20437   rgp = RNAGenNew ();
20438   rrp->ext.choice = 3;
20439   rrp->ext.value.ptrvalue = rgp;
20440   sfp->comment = StringSave(label);
20441   SetSeqLocPartial (sfp->location, TRUE, TRUE);
20442   return sfp;
20443 }
20444 
20445 //Not used for Autodef or Cleanup
ParseExtractorResultRowToFeatures(CharPtr line,SeqEntryPtr sep)20446 static SeqFeatPtr ParseExtractorResultRowToFeatures (CharPtr line, SeqEntryPtr sep)
20447 {
20448   ExtractorInfoPtr ep;
20449   SeqFeatPtr sfp = NULL;
20450   CharPtr  label;
20451   BioseqPtr bsp;
20452 
20453   if (StringHasNoText (line)) {
20454     return NULL;
20455   }
20456 
20457   ep = ExtractorInfoNew ();
20458   if (!ParseLineOfTokens(line, token_parsers, ep)) {
20459     ep = ExtractorInfoFree (ep);
20460     Message (MSG_POSTERR, "Unable to parse extractor line %s", line);
20461     return NULL;
20462   }
20463   if (!ep->has_its1 && !ep->has_its2) {
20464     ep = ExtractorInfoFree (ep);
20465     Message (MSG_POSTERR, "Unable to determine feature list for line %s", line);
20466     return NULL;
20467   }
20468 
20469   /* figure out ID */
20470   bsp = GetBioseqFromExtractorTextId(ep->id, line, sep);
20471   if (bsp == NULL) {
20472     ep = ExtractorInfoFree (ep);
20473     return NULL;
20474   }
20475 
20476   /* calculate label */
20477   label = MakeLabelFromExtractorInfo(ep);
20478 
20479   if (ep->is_complement) {
20480     BioseqRevComp (bsp);
20481     SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats);
20482   }
20483 
20484   /* make feature and attach to appropriate annots */
20485   sfp = MakeMiscRNAWithLabel(bsp, label);
20486   label = MemFree (label);
20487 
20488   ep = ExtractorInfoFree (ep);
20489   return sfp;
20490 }
20491 
20492 //Not used for Autodef or Cleanup
ParseExtractorResultsTableToFeatures(FILE * fp,SeqEntryPtr sep)20493 NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep)
20494 {
20495   ReadBufferData  rbd;
20496   CharPtr         line;
20497 
20498   rbd.fp = fp;
20499   rbd.current_data = NULL;
20500   line = AbstractReadFunction (&rbd);
20501   while (line != NULL && line[0] != EOF) {
20502     /* TODO: skip intro lines */
20503     ParseExtractorResultRowToFeatures(line, sep);
20504     line = MemFree (line);
20505     line = AbstractReadFunction (&rbd);
20506   }
20507 }
20508 
20509 //Not used for Autodef or Cleanup
ParseRNAFeatListRowToFeatures(CharPtr line,SeqEntryPtr sep,LogInfoPtr lip)20510 static SeqFeatPtr ParseRNAFeatListRowToFeatures (CharPtr line, SeqEntryPtr sep, LogInfoPtr lip)
20511 {
20512   RNAFeatListPtr ep;
20513   SeqFeatPtr sfp = NULL;
20514   CharPtr  label;
20515   BioseqPtr bsp;
20516 
20517   if (StringHasNoText (line)) {
20518     return NULL;
20519   }
20520 
20521   ep = RNAFeatListNew ();
20522   if (!ParseLineOfTokens(line, new_token_parsers, ep)) {
20523     ep = RNAFeatListFree (ep);
20524     if (lip == NULL) {
20525       Message (MSG_POSTERR, "Unable to parse extractor line %s", line);
20526     } else {
20527       if (lip->fp != NULL) {
20528         fprintf (lip->fp, "Unable to parse extractor line %s\n", line);
20529       }
20530       lip->data_in_log = TRUE;
20531     }
20532     return NULL;
20533   }
20534   if (ep->error != NULL) {
20535     if (lip == NULL) {
20536       Message (MSG_POSTERR, "Error scanning for feature on %s: %s", ep->id, ep->error);
20537       ep = RNAFeatListFree (ep);
20538     } else {
20539       if (lip->fp != NULL) {
20540         fprintf (lip->fp, "Error scanning for feature on %s: %s\n", ep->id, ep->error);
20541       }
20542       lip->data_in_log = TRUE;
20543     }
20544     return NULL;
20545   }
20546 
20547   /* figure out ID */
20548   bsp = GetBioseqFromExtractorTextId(ep->id, line, sep);
20549   if (bsp == NULL) {
20550     ep = RNAFeatListFree (ep);
20551     return NULL;
20552   }
20553 
20554   /* calculate label */
20555   label = MakeLabelFromRNAFeatList(ep);
20556 
20557   if (ep->is_complement) {
20558     BioseqRevComp (bsp);
20559     SeqEntryExplore (sep, (Pointer) bsp, RevCompFeats);
20560   }
20561 
20562   /* make feature and attach to appropriate annots */
20563   sfp = MakeMiscRNAWithLabel(bsp, label);
20564   label = MemFree (label);
20565 
20566   ep = RNAFeatListFree (ep);
20567   return sfp;
20568 }
20569 
20570 //Not used for Autodef or Cleanup
ParseRNAFeatListTableToFeatures(FILE * fp,SeqEntryPtr sep,LogInfoPtr lip)20571 NLM_EXTERN void ParseRNAFeatListTableToFeatures (FILE *fp, SeqEntryPtr sep, LogInfoPtr lip)
20572 {
20573   ReadBufferData  rbd;
20574   CharPtr         line;
20575 
20576   rbd.fp = fp;
20577   rbd.current_data = NULL;
20578   line = AbstractReadFunction (&rbd);
20579   while (line != NULL && line[0] != EOF) {
20580     /* TODO: skip intro lines */
20581     ParseRNAFeatListRowToFeatures(line, sep, lip);
20582     line = MemFree (line);
20583     line = AbstractReadFunction (&rbd);
20584   }
20585 }
20586 
20587 extern CharPtr latlon_onedegree [];
20588 CharPtr latlon_onedegree [] = {
20589   "1",
20590   "Afghanistan",
20591   "\t39\t69\t72",
20592   "\t38\t63\t75",
20593   "\t37\t62\t75",
20594   "\t36\t60\t75",
20595   "\t35\t59\t75",
20596   "\t34\t59\t72",
20597   "\t33\t59\t72",
20598   "\t32\t59\t71",
20599   "\t31\t59\t70",
20600   "\t30\t59\t70",
20601   "\t29\t59\t67",
20602   "\t28\t59\t67",
20603   "Albania",
20604   "\t43\t18\t21",
20605   "\t42\t18\t21",
20606   "\t41\t18\t22",
20607   "\t40\t18\t22",
20608   "\t39\t18\t22",
20609   "\t38\t18\t21",
20610   "Algeria",
20611   "\t38\t5\t8",
20612   "\t37\t-1\t9",
20613   "\t36\t-3\t9",
20614   "\t35\t-3\t9",
20615   "\t34\t-3\t9",
20616   "\t33\t-3\t10",
20617   "\t32\t-4\t10",
20618   "\t31\t-6\t10",
20619   "\t30\t-9\t10",
20620   "\t29\t-9\t10",
20621   "\t28\t-9\t10",
20622   "\t27\t-9\t10",
20623   "\t26\t-9\t11",
20624   "\t25\t-9\t12",
20625   "\t24\t-7\t12",
20626   "\t23\t-5\t12",
20627   "\t22\t-4\t12",
20628   "\t21\t-2\t12",
20629   "\t20\t-1\t10",
20630   "\t19\t0\t8",
20631   "\t18\t1\t7",
20632   "\t17\t2\t4",
20633   "American Samoa",
20634   "\t-10\t-172\t-170",
20635   "\t-11\t-172\t-170",
20636   "\t-12\t-172\t-170",
20637   "\t-13\t-171\t-167",
20638   "\t-14\t-171\t-167",
20639   "\t-15\t-171\t-167",
20640   "Andorra",
20641   "\t43\t0\t2",
20642   "\t42\t0\t2",
20643   "\t41\t0\t2",
20644   "Angola",
20645   "\t-3\t11\t14",
20646   "\t-4\t11\t17",
20647   "\t-5\t11\t17\t19\t21",
20648   "\t-6\t11\t22",
20649   "\t-7\t11\t22",
20650   "\t-8\t11\t23",
20651   "\t-9\t11\t25",
20652   "\t-10\t11\t25",
20653   "\t-11\t11\t25",
20654   "\t-12\t11\t25",
20655   "\t-13\t11\t25",
20656   "\t-14\t10\t25",
20657   "\t-15\t10\t23",
20658   "\t-16\t10\t24",
20659   "\t-17\t10\t24",
20660   "\t-18\t10\t24",
20661   "\t-19\t19\t22",
20662   "Anguilla",
20663   "\t19\t-64\t-61",
20664   "\t18\t-64\t-61",
20665   "\t17\t-64\t-61",
20666   "Antarctica",
20667   "\t-59\t-47\t-43",
20668   "\t-60\t-59\t-53\t-47\t-43",
20669   "\t-61\t-63\t-53\t-47\t-43",
20670   "\t-62\t-63\t-53",
20671   "\t-63\t-65\t-54",
20672   "\t-64\t-67\t-54\t51\t56\t91\t93\t99\t104\t110\t114",
20673   "\t-65\t-69\t-55\t47\t58\t84\t117\t119\t144\t161\t164",
20674   "\t-66\t-70\t-58\t42\t70\t79\t147\t161\t165",
20675   "\t-67\t-91\t-89\t-73\t-59\t31\t35\t38\t71\t76\t156\t161\t165",
20676   "\t-68\t-91\t-89\t-76\t-59\t14\t17\t31\t161\t163\t165",
20677   "\t-69\t-91\t-89\t-77\t-59\t-11\t168",
20678   "\t-70\t-103\t-95\t-77\t-59\t-13\t171",
20679   "\t-71\t-106\t-87\t-81\t-79\t-77\t-58\t-17\t171",
20680   "\t-72\t-128\t-112\t-106\t-58\t-22\t-19\t-17\t171",
20681   "\t-73\t-137\t-109\t-106\t-58\t-23\t171",
20682   "\t-74\t-147\t-58\t-27\t170",
20683   "\t-75\t-151\t-59\t-32\t167",
20684   "\t-76\t-159\t-62\t-50\t-43\t-36\t170",
20685   "\t-77\t-165\t-65\t-51\t-42\t-37\t170",
20686   "\t-78\t-165\t-64\t-62\t-58\t-52\t-41\t-37\t170",
20687   "\t-79\t-165\t-58\t-55\t168",
20688   "\t-80\t-165\t-58\t-55\t164",
20689   "\t-81\t-175\t-169\t-164\t169",
20690   "\t-82\t-175\t177",
20691   "\t-83\t-180\t180",
20692   "\t-84\t-180\t180",
20693   "\t-85\t-180\t180",
20694   "\t-86\t-180\t180",
20695   "\t-87\t-180\t180",
20696   "\t-88\t-180\t180",
20697   "\t-89\t-180\t180",
20698   "\t-90\t-180\t180",
20699   "\t-90\t-180\t180",
20700   "Antigua and Barbuda",
20701   "\t18\t-62\t-60",
20702   "\t17\t-62\t-60",
20703   "\t16\t-62\t-60",
20704   "\t15\t-62\t-60",
20705   "Argentina",
20706   "\t-20\t-67\t-61",
20707   "\t-21\t-68\t-60",
20708   "\t-22\t-68\t-59",
20709   "\t-23\t-69\t-57",
20710   "\t-24\t-69\t-52",
20711   "\t-25\t-69\t-52",
20712   "\t-26\t-70\t-52",
20713   "\t-27\t-70\t-52",
20714   "\t-28\t-71\t-52",
20715   "\t-29\t-71\t-54",
20716   "\t-30\t-71\t-55",
20717   "\t-31\t-71\t-56",
20718   "\t-32\t-71\t-56",
20719   "\t-33\t-71\t-56",
20720   "\t-34\t-71\t-56",
20721   "\t-35\t-72\t-55",
20722   "\t-36\t-72\t-55",
20723   "\t-37\t-72\t-55",
20724   "\t-38\t-72\t-55",
20725   "\t-39\t-72\t-56",
20726   "\t-40\t-72\t-60",
20727   "\t-41\t-73\t-61",
20728   "\t-42\t-73\t-61",
20729   "\t-43\t-73\t-62",
20730   "\t-44\t-73\t-63",
20731   "\t-45\t-73\t-64",
20732   "\t-46\t-73\t-64",
20733   "\t-47\t-74\t-64",
20734   "\t-48\t-74\t-64",
20735   "\t-49\t-74\t-64",
20736   "\t-50\t-74\t-66",
20737   "\t-51\t-74\t-66",
20738   "\t-52\t-73\t-66",
20739   "\t-53\t-71\t-62",
20740   "\t-54\t-69\t-62",
20741   "\t-55\t-69\t-62",
20742   "\t-56\t-67\t-65",
20743   "Armenia",
20744   "\t42\t42\t46",
20745   "\t41\t42\t46",
20746   "\t40\t42\t47",
20747   "\t39\t42\t47",
20748   "\t38\t43\t47",
20749   "\t37\t45\t47",
20750   "Aruba",
20751   "\t13\t-71\t-68",
20752   "\t12\t-71\t-68",
20753   "\t11\t-71\t-68",
20754   "Ashmore and Cartier Islands",
20755   "\t-11\t122\t124",
20756   "\t-12\t122\t124",
20757   "\t-13\t122\t124",
20758   "Australia",
20759   "\t-8\t141\t143",
20760   "\t-9\t131\t133\t141\t143",
20761   "\t-10\t129\t137\t140\t144",
20762   "\t-11\t129\t137\t140\t144",
20763   "\t-12\t124\t137\t140\t144",
20764   "\t-13\t123\t137\t140\t146",
20765   "\t-14\t123\t138\t140\t146",
20766   "\t-15\t121\t146",
20767   "\t-16\t121\t147",
20768   "\t-17\t120\t147",
20769   "\t-18\t118\t149",
20770   "\t-19\t114\t150",
20771   "\t-20\t112\t151",
20772   "\t-21\t112\t151",
20773   "\t-22\t112\t152",
20774   "\t-23\t112\t154",
20775   "\t-24\t111\t154",
20776   "\t-25\t111\t154",
20777   "\t-26\t111\t154",
20778   "\t-27\t112\t154",
20779   "\t-28\t112\t154",
20780   "\t-29\t112\t154",
20781   "\t-30\t113\t154\t158\t160",
20782   "\t-31\t113\t154\t158\t160",
20783   "\t-32\t113\t154\t158\t160",
20784   "\t-33\t113\t129\t131\t153",
20785   "\t-34\t113\t125\t133\t152",
20786   "\t-35\t113\t124\t134\t152",
20787   "\t-36\t115\t119\t134\t151",
20788   "\t-37\t135\t151",
20789   "\t-38\t138\t151",
20790   "\t-39\t139\t149",
20791   "\t-40\t142\t149",
20792   "\t-41\t142\t149",
20793   "\t-42\t143\t149",
20794   "\t-43\t144\t149",
20795   "\t-44\t144\t149",
20796   "\t-53\t157\t159",
20797   "\t-54\t157\t159",
20798   "\t-55\t157\t159",
20799   "Australia: Australian Capital Territory",
20800   "\t-34\t147\t150",
20801   "\t-35\t147\t150",
20802   "\t-36\t147\t150",
20803   "Australia: Jervis Bay Territory",
20804   "\t-34\t149\t151",
20805   "\t-35\t149\t151",
20806   "\t-36\t149\t151",
20807   "Australia: New South Wales",
20808   "\t-27\t147\t154",
20809   "\t-28\t140\t154",
20810   "\t-29\t140\t154",
20811   "\t-30\t140\t154",
20812   "\t-31\t140\t154",
20813   "\t-32\t140\t154",
20814   "\t-33\t140\t153",
20815   "\t-34\t140\t152",
20816   "\t-35\t140\t152",
20817   "\t-36\t142\t151",
20818   "\t-37\t143\t151",
20819   "\t-38\t147\t151",
20820   "Australia: Northern Territory",
20821   "\t-9\t131\t133",
20822   "\t-10\t129\t137",
20823   "\t-11\t129\t137",
20824   "\t-12\t128\t137",
20825   "\t-13\t128\t137",
20826   "\t-14\t128\t138",
20827   "\t-15\t128\t139",
20828   "\t-16\t128\t139",
20829   "\t-17\t128\t139",
20830   "\t-18\t128\t139",
20831   "\t-19\t128\t139",
20832   "\t-20\t128\t139",
20833   "\t-21\t128\t139",
20834   "\t-22\t128\t139",
20835   "\t-23\t128\t139",
20836   "\t-24\t128\t139",
20837   "\t-25\t128\t139",
20838   "\t-26\t128\t139",
20839   "\t-27\t128\t139",
20840   "Australia: Queensland",
20841   "\t-8\t141\t145",
20842   "\t-9\t141\t145",
20843   "\t-10\t140\t145",
20844   "\t-11\t140\t145",
20845   "\t-12\t140\t145",
20846   "\t-13\t140\t146",
20847   "\t-14\t140\t146",
20848   "\t-15\t137\t147",
20849   "\t-16\t137\t147",
20850   "\t-17\t137\t147",
20851   "\t-18\t137\t149",
20852   "\t-19\t137\t150",
20853   "\t-20\t137\t151",
20854   "\t-21\t137\t151",
20855   "\t-22\t137\t152",
20856   "\t-23\t137\t154",
20857   "\t-24\t137\t154",
20858   "\t-25\t137\t154",
20859   "\t-26\t137\t154",
20860   "\t-27\t137\t154",
20861   "\t-28\t140\t154",
20862   "\t-29\t140\t154",
20863   "\t-30\t140\t152",
20864   "Australia: South Australia",
20865   "\t-25\t128\t142",
20866   "\t-26\t128\t142",
20867   "\t-27\t128\t142",
20868   "\t-28\t128\t142",
20869   "\t-29\t128\t142",
20870   "\t-30\t128\t142",
20871   "\t-31\t128\t142",
20872   "\t-32\t128\t142",
20873   "\t-33\t131\t142",
20874   "\t-34\t133\t142",
20875   "\t-35\t134\t142",
20876   "\t-36\t134\t141",
20877   "\t-37\t135\t141",
20878   "\t-38\t138\t141",
20879   "\t-39\t139\t141",
20880   "Australia: Tasmania",
20881   "\t-38\t142\t149",
20882   "\t-39\t142\t149",
20883   "\t-40\t142\t149",
20884   "\t-41\t142\t149",
20885   "\t-42\t143\t149",
20886   "\t-43\t144\t149",
20887   "\t-44\t144\t149",
20888   "Australia: Victoria",
20889   "\t-32\t139\t141",
20890   "\t-33\t139\t144",
20891   "\t-34\t139\t148",
20892   "\t-35\t139\t149",
20893   "\t-36\t139\t150",
20894   "\t-37\t139\t150",
20895   "\t-38\t139\t150",
20896   "\t-39\t139\t148",
20897   "\t-40\t145\t147",
20898   "Australia: Western Australia",
20899   "\t-12\t124\t128",
20900   "\t-13\t123\t130",
20901   "\t-14\t122\t130",
20902   "\t-15\t121\t130",
20903   "\t-16\t121\t130",
20904   "\t-17\t120\t130",
20905   "\t-18\t118\t130",
20906   "\t-19\t114\t130",
20907   "\t-20\t112\t130",
20908   "\t-21\t112\t130",
20909   "\t-22\t112\t130",
20910   "\t-23\t112\t130",
20911   "\t-24\t111\t130",
20912   "\t-25\t111\t130",
20913   "\t-26\t111\t130",
20914   "\t-27\t112\t130",
20915   "\t-28\t112\t130",
20916   "\t-29\t112\t130",
20917   "\t-30\t113\t130",
20918   "\t-31\t113\t130",
20919   "\t-32\t113\t130",
20920   "\t-33\t113\t129",
20921   "\t-34\t113\t125",
20922   "\t-35\t113\t124",
20923   "\t-36\t115\t119",
20924   "Austria",
20925   "\t50\t13\t16",
20926   "\t49\t11\t18",
20927   "\t48\t8\t18",
20928   "\t47\t8\t18",
20929   "\t46\t8\t18",
20930   "\t45\t8\t17",
20931   "Azerbaijan",
20932   "\t42\t43\t50",
20933   "\t41\t43\t51",
20934   "\t40\t43\t51",
20935   "\t39\t43\t51",
20936   "\t38\t43\t50",
20937   "\t37\t44\t50",
20938   "Bahamas",
20939   "\t27\t-79\t-76",
20940   "\t26\t-80\t-75",
20941   "\t25\t-80\t-73",
20942   "\t24\t-80\t-72",
20943   "\t23\t-80\t-71",
20944   "\t22\t-80\t-71",
20945   "\t21\t-76\t-71",
20946   "\t20\t-74\t-71",
20947   "\t19\t-74\t-72",
20948   "Bahrain",
20949   "\t27\t49\t51",
20950   "\t26\t49\t51",
20951   "\t25\t49\t51",
20952   "\t24\t49\t51",
20953   "Baker Island",
20954   "\t1\t-177\t-175",
20955   "\t0\t-177\t-175",
20956   "\t-1\t-177\t-175",
20957   "Bangladesh",
20958   "\t27\t87\t90",
20959   "\t26\t87\t93",
20960   "\t25\t87\t93",
20961   "\t24\t87\t93",
20962   "\t23\t87\t93",
20963   "\t22\t87\t93",
20964   "\t21\t87\t93",
20965   "\t20\t88\t93",
20966   "\t19\t91\t93",
20967   "Barbados",
20968   "\t14\t-60\t-58",
20969   "\t13\t-60\t-58",
20970   "\t12\t-60\t-58",
20971   "Bassas da India",
20972   "\t-20\t38\t40",
20973   "\t-21\t38\t40",
20974   "\t-22\t38\t40",
20975   "Belarus",
20976   "\t57\t26\t30",
20977   "\t56\t25\t32",
20978   "\t55\t23\t32",
20979   "\t54\t22\t33",
20980   "\t53\t22\t33",
20981   "\t52\t22\t33",
20982   "\t51\t22\t32",
20983   "\t50\t22\t31",
20984   "Belgium",
20985   "\t52\t1\t6",
20986   "\t51\t1\t7",
20987   "\t50\t1\t7",
20988   "\t49\t1\t7",
20989   "\t48\t3\t6",
20990   "Belize",
20991   "\t19\t-90\t-86",
20992   "\t18\t-90\t-86",
20993   "\t17\t-90\t-86",
20994   "\t16\t-90\t-86",
20995   "\t15\t-90\t-87",
20996   "\t14\t-90\t-87",
20997   "Benin",
20998   "\t13\t1\t4",
20999   "\t12\t-1\t4",
21000   "\t11\t-1\t4",
21001   "\t10\t-1\t4",
21002   "\t9\t-1\t4",
21003   "\t8\t0\t4",
21004   "\t7\t0\t3",
21005   "\t6\t0\t3",
21006   "\t5\t0\t3",
21007   "Bermuda",
21008   "\t33\t-65\t-63",
21009   "\t32\t-65\t-63",
21010   "\t31\t-65\t-63",
21011   "Bhutan",
21012   "\t29\t88\t92",
21013   "\t28\t87\t93",
21014   "\t27\t87\t93",
21015   "\t26\t87\t93",
21016   "\t25\t87\t93",
21017   "Bolivia",
21018   "\t-8\t-67\t-64",
21019   "\t-9\t-70\t-64",
21020   "\t-10\t-70\t-63",
21021   "\t-11\t-70\t-61",
21022   "\t-12\t-70\t-59",
21023   "\t-13\t-70\t-59",
21024   "\t-14\t-70\t-59",
21025   "\t-15\t-70\t-57",
21026   "\t-16\t-70\t-56",
21027   "\t-17\t-70\t-56",
21028   "\t-18\t-70\t-56",
21029   "\t-19\t-70\t-56",
21030   "\t-20\t-69\t-56",
21031   "\t-21\t-69\t-56",
21032   "\t-22\t-69\t-61",
21033   "\t-23\t-69\t-61",
21034   "Borneo",
21035   "\t6\t113\t116",
21036   "\t5\t112\t116",
21037   "\t4\t112\t116",
21038   "\t3\t112\t116",
21039   "Borneo",
21040   "\t5\t114\t118",
21041   "\t4\t107\t109\t114\t118",
21042   "\t3\t107\t110\t113\t119",
21043   "\t2\t107\t120",
21044   "\t1\t107\t120",
21045   "\t0\t107\t120",
21046   "\t-1\t107\t120",
21047   "\t-2\t107\t118",
21048   "\t-3\t109\t117",
21049   "\t-4\t109\t117",
21050   "\t-5\t113\t117",
21051   "Borneo",
21052   "\t8\t115\t118",
21053   "\t7\t115\t119",
21054   "\t6\t114\t120",
21055   "\t5\t112\t120",
21056   "\t4\t111\t120",
21057   "\t3\t108\t119",
21058   "\t2\t108\t116",
21059   "\t1\t108\t116",
21060   "\t0\t108\t115",
21061   "\t-1\t109\t112",
21062   "Bosnia and Herzegovina",
21063   "\t46\t14\t19",
21064   "\t45\t14\t20",
21065   "\t44\t14\t20",
21066   "\t43\t14\t20",
21067   "\t42\t15\t20",
21068   "\t41\t16\t19",
21069   "Botswana",
21070   "\t-16\t22\t26",
21071   "\t-17\t19\t26",
21072   "\t-18\t19\t27",
21073   "\t-19\t19\t28",
21074   "\t-20\t19\t30",
21075   "\t-21\t18\t30",
21076   "\t-22\t18\t30",
21077   "\t-23\t18\t30",
21078   "\t-24\t18\t28",
21079   "\t-25\t18\t27",
21080   "\t-26\t19\t26",
21081   "\t-27\t19\t23",
21082   "Bouvet Island",
21083   "\t-53\t2\t4",
21084   "\t-54\t2\t4",
21085   "\t-55\t2\t4",
21086   "Brazil",
21087   "\t6\t-61\t-58",
21088   "\t5\t-65\t-58\t-52\t-50",
21089   "\t4\t-65\t-58\t-53\t-49",
21090   "\t3\t-69\t-49",
21091   "\t2\t-70\t-48",
21092   "\t1\t-71\t-45\t-30\t-28",
21093   "\t0\t-71\t-43\t-30\t-28",
21094   "\t-1\t-71\t-38\t-30\t-28",
21095   "\t-2\t-70\t-37\t-33\t-31",
21096   "\t-3\t-73\t-35\t-33\t-31",
21097   "\t-4\t-74\t-31",
21098   "\t-5\t-74\t-33",
21099   "\t-6\t-75\t-33",
21100   "\t-7\t-75\t-33",
21101   "\t-8\t-75\t-33",
21102   "\t-9\t-74\t-33",
21103   "\t-10\t-74\t-34",
21104   "\t-11\t-73\t-35",
21105   "\t-12\t-71\t-36",
21106   "\t-13\t-65\t-36",
21107   "\t-14\t-63\t-37",
21108   "\t-15\t-61\t-37",
21109   "\t-16\t-61\t-37",
21110   "\t-17\t-61\t-37",
21111   "\t-18\t-59\t-38",
21112   "\t-19\t-59\t-38\t-30\t-27",
21113   "\t-20\t-59\t-38\t-30\t-27",
21114   "\t-21\t-59\t-39\t-30\t-27",
21115   "\t-22\t-58\t-39",
21116   "\t-23\t-58\t-39",
21117   "\t-24\t-56\t-42",
21118   "\t-25\t-56\t-45",
21119   "\t-26\t-56\t-46",
21120   "\t-27\t-57\t-47",
21121   "\t-28\t-58\t-47",
21122   "\t-29\t-58\t-47",
21123   "\t-30\t-58\t-48",
21124   "\t-31\t-58\t-49",
21125   "\t-32\t-57\t-49",
21126   "\t-33\t-54\t-51",
21127   "\t-34\t-54\t-51",
21128   "British Indian Ocean Territory",
21129   "\t-4\t70\t73",
21130   "\t-5\t70\t73",
21131   "\t-6\t70\t73",
21132   "\t-7\t70\t73",
21133   "\t-8\t71\t73",
21134   "British Virgin Islands",
21135   "\t19\t-65\t-63",
21136   "\t18\t-65\t-63",
21137   "\t17\t-65\t-63",
21138   "Brunei",
21139   "\t6\t113\t116",
21140   "\t5\t112\t116",
21141   "\t4\t112\t116",
21142   "\t3\t112\t116",
21143   "Bulgaria",
21144   "\t45\t21\t28",
21145   "\t44\t21\t29",
21146   "\t43\t21\t29",
21147   "\t42\t21\t29",
21148   "\t41\t21\t29",
21149   "\t40\t21\t29",
21150   "Burkina Faso",
21151   "\t16\t-1\t1",
21152   "\t15\t-3\t1",
21153   "\t14\t-5\t2",
21154   "\t13\t-5\t3",
21155   "\t12\t-6\t3",
21156   "\t11\t-6\t3",
21157   "\t10\t-6\t3",
21158   "\t9\t-6\t1",
21159   "\t8\t-5\t-1",
21160   "Burundi",
21161   "\t-1\t27\t31",
21162   "\t-2\t27\t31",
21163   "\t-3\t27\t31",
21164   "\t-4\t28\t31",
21165   "\t-5\t28\t31",
21166   "Cambodia",
21167   "\t15\t101\t108",
21168   "\t14\t101\t108",
21169   "\t13\t101\t108",
21170   "\t12\t101\t108",
21171   "\t11\t101\t108",
21172   "\t10\t101\t107",
21173   "\t9\t102\t107",
21174   "Cameroon",
21175   "\t14\t13\t15",
21176   "\t13\t13\t16",
21177   "\t12\t12\t16",
21178   "\t11\t12\t16",
21179   "\t10\t11\t16",
21180   "\t9\t11\t16",
21181   "\t8\t9\t16",
21182   "\t7\t8\t16",
21183   "\t6\t7\t16",
21184   "\t5\t7\t16",
21185   "\t4\t7\t16",
21186   "\t3\t7\t17",
21187   "\t2\t8\t17",
21188   "\t1\t8\t17",
21189   "\t0\t14\t17",
21190   "Canada",
21191   "\t84\t-78\t-67",
21192   "\t83\t-90\t-60",
21193   "\t82\t-96\t-60",
21194   "\t81\t-101\t-60",
21195   "\t80\t-106\t-61",
21196   "\t79\t-115\t-108\t-106\t-66",
21197   "\t78\t-121\t-69",
21198   "\t77\t-124\t-73",
21199   "\t76\t-124\t-74",
21200   "\t75\t-125\t-76",
21201   "\t74\t-125\t-75",
21202   "\t73\t-126\t-73",
21203   "\t72\t-126\t-69",
21204   "\t71\t-132\t-66",
21205   "\t70\t-142\t-65",
21206   "\t69\t-142\t-63",
21207   "\t68\t-142\t-61",
21208   "\t67\t-142\t-60",
21209   "\t66\t-142\t-60",
21210   "\t65\t-142\t-60",
21211   "\t64\t-142\t-61",
21212   "\t63\t-142\t-62",
21213   "\t62\t-142\t-63",
21214   "\t61\t-142\t-89\t-84\t-63",
21215   "\t60\t-142\t-91\t-81\t-62",
21216   "\t59\t-142\t-91\t-81\t-61",
21217   "\t58\t-140\t-88\t-81\t-60",
21218   "\t57\t-138\t-86\t-81\t-59",
21219   "\t56\t-133\t-57",
21220   "\t55\t-134\t-56",
21221   "\t54\t-134\t-54",
21222   "\t53\t-134\t-54",
21223   "\t52\t-134\t-54",
21224   "\t51\t-133\t-54",
21225   "\t50\t-132\t-52",
21226   "\t49\t-129\t-51",
21227   "\t48\t-128\t-51",
21228   "\t47\t-126\t-51",
21229   "\t46\t-90\t-51",
21230   "\t45\t-86\t-58\t-56\t-51",
21231   "\t44\t-84\t-58",
21232   "\t43\t-84\t-73\t-67\t-58",
21233   "\t42\t-84\t-75\t-67\t-63\t-61\t-58",
21234   "\t41\t-84\t-77",
21235   "\t40\t-84\t-80",
21236   "Canada: Alberta",
21237   "\t61\t-121\t-109",
21238   "\t60\t-121\t-109",
21239   "\t59\t-121\t-109",
21240   "\t58\t-121\t-109",
21241   "\t57\t-121\t-109",
21242   "\t56\t-121\t-109",
21243   "\t55\t-121\t-109",
21244   "\t54\t-121\t-109",
21245   "\t53\t-121\t-109",
21246   "\t52\t-121\t-109",
21247   "\t51\t-119\t-109",
21248   "\t50\t-118\t-109",
21249   "\t49\t-116\t-109",
21250   "\t48\t-115\t-109",
21251   "\t47\t-115\t-109",
21252   "Canada: British Columbia",
21253   "\t61\t-140\t-119",
21254   "\t60\t-140\t-119",
21255   "\t59\t-140\t-119",
21256   "\t58\t-140\t-119",
21257   "\t57\t-138\t-119",
21258   "\t56\t-133\t-119",
21259   "\t55\t-134\t-119",
21260   "\t54\t-134\t-117",
21261   "\t53\t-134\t-116",
21262   "\t52\t-134\t-114",
21263   "\t51\t-133\t-113",
21264   "\t50\t-132\t-113",
21265   "\t49\t-129\t-113",
21266   "\t48\t-128\t-113",
21267   "\t47\t-126\t-113",
21268   "Canada: Manitoba",
21269   "\t61\t-103\t-93",
21270   "\t60\t-103\t-93",
21271   "\t59\t-103\t-91",
21272   "\t58\t-103\t-88",
21273   "\t57\t-103\t-87",
21274   "\t56\t-103\t-87",
21275   "\t55\t-103\t-87",
21276   "\t54\t-103\t-89",
21277   "\t53\t-102\t-90",
21278   "\t52\t-102\t-92",
21279   "\t51\t-102\t-93",
21280   "\t50\t-102\t-94",
21281   "\t49\t-102\t-94",
21282   "\t48\t-102\t-94",
21283   "\t47\t-102\t-94",
21284   "Canada: New Brunswick",
21285   "\t49\t-67\t-63",
21286   "\t48\t-70\t-63",
21287   "\t47\t-70\t-62",
21288   "\t46\t-70\t-62",
21289   "\t45\t-68\t-62",
21290   "\t44\t-68\t-63",
21291   "\t43\t-67\t-65",
21292   "Canada: Newfoundland and Labrador",
21293   "\t61\t-65\t-63",
21294   "\t60\t-65\t-62",
21295   "\t59\t-65\t-61",
21296   "\t58\t-65\t-60",
21297   "\t57\t-65\t-59",
21298   "\t56\t-68\t-57",
21299   "\t55\t-68\t-56",
21300   "\t54\t-68\t-54",
21301   "\t53\t-68\t-54",
21302   "\t52\t-68\t-54",
21303   "\t51\t-68\t-54",
21304   "\t50\t-66\t-63\t-59\t-52",
21305   "\t49\t-60\t-51",
21306   "\t48\t-60\t-51",
21307   "\t47\t-60\t-51",
21308   "\t46\t-60\t-51",
21309   "\t45\t-56\t-51",
21310   "Canada: Northwest Territories",
21311   "\t79\t-115\t-109",
21312   "\t78\t-121\t-109",
21313   "\t77\t-124\t-109",
21314   "\t76\t-124\t-109",
21315   "\t75\t-125\t-109",
21316   "\t74\t-125\t-109",
21317   "\t73\t-126\t-109",
21318   "\t72\t-126\t-109",
21319   "\t71\t-132\t-109",
21320   "\t70\t-137\t-109",
21321   "\t69\t-137\t-109",
21322   "\t68\t-137\t-115\t-113\t-111",
21323   "\t67\t-137\t-113",
21324   "\t66\t-137\t-108",
21325   "\t65\t-135\t-100",
21326   "\t64\t-134\t-100",
21327   "\t63\t-133\t-100",
21328   "\t62\t-131\t-100",
21329   "\t61\t-130\t-101",
21330   "\t60\t-129\t-101",
21331   "\t59\t-127\t-101",
21332   "Canada: Nova Scotia",
21333   "\t48\t-61\t-59",
21334   "\t47\t-65\t-58",
21335   "\t46\t-66\t-58",
21336   "\t45\t-67\t-58",
21337   "\t44\t-67\t-58",
21338   "\t43\t-67\t-58",
21339   "\t42\t-67\t-63\t-61\t-58",
21340   "Canada: Nunavut",
21341   "\t84\t-78\t-67",
21342   "\t83\t-90\t-60",
21343   "\t82\t-96\t-60",
21344   "\t81\t-101\t-60",
21345   "\t80\t-106\t-61",
21346   "\t79\t-111\t-108\t-106\t-66",
21347   "\t78\t-111\t-69",
21348   "\t77\t-111\t-73",
21349   "\t76\t-111\t-74",
21350   "\t75\t-111\t-76",
21351   "\t74\t-111\t-75",
21352   "\t73\t-111\t-73",
21353   "\t72\t-111\t-69",
21354   "\t71\t-118\t-66",
21355   "\t70\t-121\t-65",
21356   "\t69\t-121\t-63",
21357   "\t68\t-121\t-61",
21358   "\t67\t-121\t-60",
21359   "\t66\t-121\t-60",
21360   "\t65\t-118\t-60",
21361   "\t64\t-114\t-61",
21362   "\t63\t-110\t-62",
21363   "\t62\t-103\t-63",
21364   "\t61\t-103\t-89\t-84\t-63",
21365   "\t60\t-103\t-91\t-81\t-76\t-69\t-63",
21366   "\t59\t-103\t-93\t-81\t-76\t-69\t-63",
21367   "\t58\t-81\t-75",
21368   "\t57\t-81\t-75",
21369   "\t56\t-81\t-75",
21370   "\t55\t-82\t-75",
21371   "\t54\t-83\t-76",
21372   "\t53\t-83\t-77",
21373   "\t52\t-83\t-77",
21374   "\t51\t-82\t-77",
21375   "\t50\t-80\t-78",
21376   "Canada: Ontario",
21377   "\t57\t-90\t-86",
21378   "\t56\t-92\t-81",
21379   "\t55\t-94\t-81",
21380   "\t54\t-95\t-81",
21381   "\t53\t-96\t-79",
21382   "\t52\t-96\t-78",
21383   "\t51\t-96\t-78",
21384   "\t50\t-96\t-78",
21385   "\t49\t-96\t-78",
21386   "\t48\t-96\t-78",
21387   "\t47\t-95\t-76",
21388   "\t46\t-90\t-73",
21389   "\t45\t-86\t-73",
21390   "\t44\t-84\t-73",
21391   "\t43\t-84\t-73",
21392   "\t42\t-84\t-75",
21393   "\t41\t-84\t-77",
21394   "\t40\t-84\t-80",
21395   "Canada: Prince Edward Island",
21396   "\t48\t-65\t-62",
21397   "\t47\t-65\t-60",
21398   "\t46\t-65\t-60",
21399   "\t45\t-65\t-60",
21400   "\t44\t-63\t-61",
21401   "Canada: Quebec",
21402   "\t63\t-79\t-71",
21403   "\t62\t-79\t-68",
21404   "\t61\t-79\t-68\t-66\t-63",
21405   "\t60\t-79\t-63",
21406   "\t59\t-79\t-62",
21407   "\t58\t-79\t-62",
21408   "\t57\t-79\t-62",
21409   "\t56\t-79\t-62",
21410   "\t55\t-80\t-62",
21411   "\t54\t-80\t-62",
21412   "\t53\t-80\t-56",
21413   "\t52\t-80\t-56",
21414   "\t51\t-80\t-56",
21415   "\t50\t-80\t-56",
21416   "\t49\t-80\t-57",
21417   "\t48\t-80\t-60",
21418   "\t47\t-80\t-60",
21419   "\t46\t-80\t-65\t-63\t-60",
21420   "\t45\t-80\t-68",
21421   "\t44\t-78\t-69",
21422   "\t43\t-75\t-73",
21423   "Canada: Saskatchewan",
21424   "\t61\t-111\t-101",
21425   "\t60\t-111\t-101",
21426   "\t59\t-111\t-101",
21427   "\t58\t-111\t-101",
21428   "\t57\t-111\t-101",
21429   "\t56\t-111\t-100",
21430   "\t55\t-111\t-100",
21431   "\t54\t-111\t-100",
21432   "\t53\t-111\t-100",
21433   "\t52\t-111\t-100",
21434   "\t51\t-111\t-100",
21435   "\t50\t-111\t-100",
21436   "\t49\t-111\t-100",
21437   "\t48\t-111\t-100",
21438   "\t47\t-111\t-100",
21439   "Canada: Yukon",
21440   "\t70\t-142\t-136",
21441   "\t69\t-142\t-135",
21442   "\t68\t-142\t-132",
21443   "\t67\t-142\t-131",
21444   "\t66\t-142\t-131",
21445   "\t65\t-142\t-129",
21446   "\t64\t-142\t-128",
21447   "\t63\t-142\t-127",
21448   "\t62\t-142\t-125",
21449   "\t61\t-142\t-122",
21450   "\t60\t-142\t-122",
21451   "\t59\t-142\t-122",
21452   "Cape Verde",
21453   "\t18\t-26\t-23",
21454   "\t17\t-26\t-21",
21455   "\t16\t-26\t-21",
21456   "\t15\t-26\t-21",
21457   "\t14\t-25\t-21",
21458   "\t13\t-25\t-22",
21459   "Cayman Islands",
21460   "\t20\t-82\t-78",
21461   "\t19\t-82\t-78",
21462   "\t18\t-82\t-78",
21463   "Central African Republic",
21464   "\t12\t21\t23",
21465   "\t11\t20\t24",
21466   "\t10\t18\t24",
21467   "\t9\t17\t25",
21468   "\t8\t14\t26",
21469   "\t7\t13\t27",
21470   "\t6\t13\t28",
21471   "\t5\t13\t28",
21472   "\t4\t13\t28",
21473   "\t3\t13\t26",
21474   "\t2\t14\t19",
21475   "\t1\t14\t17",
21476   "Chad",
21477   "\t24\t13\t17",
21478   "\t23\t13\t19",
21479   "\t22\t13\t21",
21480   "\t21\t13\t23",
21481   "\t20\t14\t24",
21482   "\t19\t14\t24",
21483   "\t18\t14\t24",
21484   "\t17\t13\t24",
21485   "\t16\t12\t24",
21486   "\t15\t12\t24",
21487   "\t14\t12\t24",
21488   "\t13\t12\t23",
21489   "\t12\t12\t23",
21490   "\t11\t13\t23",
21491   "\t10\t12\t23",
21492   "\t9\t12\t23",
21493   "\t8\t12\t22",
21494   "\t7\t13\t20",
21495   "\t6\t14\t18",
21496   "Chile",
21497   "\t-16\t-70\t-68",
21498   "\t-17\t-71\t-67",
21499   "\t-18\t-71\t-67",
21500   "\t-19\t-71\t-67",
21501   "\t-20\t-71\t-67",
21502   "\t-21\t-71\t-66",
21503   "\t-22\t-71\t-66",
21504   "\t-23\t-71\t-66",
21505   "\t-24\t-71\t-66",
21506   "\t-25\t-106\t-104\t-81\t-78\t-71\t-66",
21507   "\t-26\t-110\t-108\t-106\t-104\t-81\t-78\t-72\t-67",
21508   "\t-27\t-110\t-108\t-106\t-104\t-81\t-78\t-72\t-67",
21509   "\t-28\t-110\t-108\t-72\t-67",
21510   "\t-29\t-72\t-68",
21511   "\t-30\t-72\t-68",
21512   "\t-31\t-72\t-68",
21513   "\t-32\t-81\t-77\t-72\t-68",
21514   "\t-33\t-81\t-77\t-73\t-68",
21515   "\t-34\t-81\t-77\t-73\t-68",
21516   "\t-35\t-74\t-68",
21517   "\t-36\t-74\t-69",
21518   "\t-37\t-74\t-69",
21519   "\t-38\t-74\t-69",
21520   "\t-39\t-74\t-69",
21521   "\t-40\t-75\t-70",
21522   "\t-41\t-75\t-70",
21523   "\t-42\t-75\t-70",
21524   "\t-43\t-76\t-70",
21525   "\t-44\t-76\t-70",
21526   "\t-45\t-76\t-70",
21527   "\t-46\t-76\t-70",
21528   "\t-47\t-76\t-70",
21529   "\t-48\t-76\t-70",
21530   "\t-49\t-76\t-71",
21531   "\t-50\t-76\t-69",
21532   "\t-51\t-76\t-67",
21533   "\t-52\t-76\t-67",
21534   "\t-53\t-76\t-66",
21535   "\t-54\t-75\t-65",
21536   "\t-55\t-74\t-65",
21537   "\t-56\t-72\t-65",
21538   "China",
21539   "\t54\t119\t126",
21540   "\t53\t119\t127",
21541   "\t52\t118\t127",
21542   "\t51\t118\t128",
21543   "\t50\t85\t88\t115\t130",
21544   "\t49\t84\t90\t114\t135",
21545   "\t48\t81\t91\t114\t135",
21546   "\t47\t81\t92\t114\t135",
21547   "\t46\t79\t94\t110\t135",
21548   "\t45\t78\t96\t110\t135",
21549   "\t44\t78\t96\t109\t134",
21550   "\t43\t78\t132",
21551   "\t42\t75\t132",
21552   "\t41\t72\t132",
21553   "\t40\t72\t129",
21554   "\t39\t72\t127",
21555   "\t38\t72\t125",
21556   "\t37\t72\t123",
21557   "\t36\t73\t123",
21558   "\t35\t73\t123",
21559   "\t34\t74\t121",
21560   "\t33\t77\t122",
21561   "\t32\t77\t122",
21562   "\t31\t77\t123",
21563   "\t30\t77\t123",
21564   "\t29\t78\t123",
21565   "\t28\t81\t123",
21566   "\t27\t83\t122",
21567   "\t26\t84\t93\t96\t122",
21568   "\t25\t96\t121",
21569   "\t24\t96\t120",
21570   "\t23\t96\t120",
21571   "\t22\t96\t118",
21572   "\t21\t98\t117",
21573   "\t20\t98\t102\t105\t114",
21574   "\t19\t107\t112",
21575   "\t18\t107\t112",
21576   "\t17\t107\t113",
21577   "\t16\t110\t113",
21578   "\t15\t110\t113",
21579   "\t14\t110\t112",
21580   "China: Hainan",
21581   "\t21\t108\t111",
21582   "\t20\t107\t112",
21583   "\t19\t107\t112",
21584   "\t18\t107\t112",
21585   "\t17\t107\t111",
21586   "Christmas Island",
21587   "\t-9\t104\t106",
21588   "\t-10\t104\t106",
21589   "\t-11\t104\t106",
21590   "Clipperton Island",
21591   "\t11\t-110\t-108",
21592   "\t10\t-110\t-108",
21593   "\t9\t-110\t-108",
21594   "Cocos Islands",
21595   "\t-11\t95\t97",
21596   "\t-12\t95\t97",
21597   "\t-13\t95\t97",
21598   "Colombia",
21599   "\t14\t-82\t-79",
21600   "\t13\t-82\t-79\t-73\t-70",
21601   "\t12\t-82\t-79\t-75\t-70",
21602   "\t11\t-82\t-80\t-76\t-70",
21603   "\t10\t-77\t-70",
21604   "\t9\t-78\t-71",
21605   "\t8\t-78\t-69",
21606   "\t7\t-78\t-66",
21607   "\t6\t-78\t-66",
21608   "\t5\t-78\t-66",
21609   "\t4\t-82\t-66",
21610   "\t3\t-82\t-66",
21611   "\t2\t-82\t-65",
21612   "\t1\t-80\t-65",
21613   "\t0\t-80\t-65",
21614   "\t-1\t-79\t-68",
21615   "\t-2\t-75\t-68",
21616   "\t-3\t-74\t-68",
21617   "\t-4\t-71\t-68",
21618   "\t-5\t-71\t-68",
21619   "Comoros",
21620   "\t-10\t42\t44",
21621   "\t-11\t42\t45",
21622   "\t-12\t42\t45",
21623   "\t-13\t42\t45",
21624   "Cook Islands",
21625   "\t-7\t-159\t-156",
21626   "\t-8\t-159\t-156",
21627   "\t-9\t-166\t-164\t-162\t-156",
21628   "\t-10\t-166\t-164\t-162\t-159",
21629   "\t-11\t-166\t-164\t-162\t-159",
21630   "\t-17\t-160\t-158",
21631   "\t-18\t-160\t-156",
21632   "\t-19\t-160\t-156",
21633   "\t-20\t-160\t-156",
21634   "\t-21\t-160\t-156",
21635   "\t-22\t-160\t-156",
21636   "Coral Sea Islands",
21637   "\t-15\t146\t151",
21638   "\t-16\t146\t151",
21639   "\t-17\t146\t151",
21640   "\t-18\t147\t149",
21641   "\t-20\t151\t156",
21642   "\t-21\t151\t156",
21643   "\t-22\t151\t156",
21644   "\t-23\t151\t156",
21645   "Costa Rica",
21646   "\t12\t-86\t-83",
21647   "\t11\t-86\t-82",
21648   "\t10\t-86\t-81",
21649   "\t9\t-86\t-81",
21650   "\t8\t-86\t-81",
21651   "\t7\t-84\t-81",
21652   "\t6\t-88\t-86",
21653   "\t5\t-88\t-86",
21654   "\t4\t-88\t-86",
21655   "Cote d'Ivoire",
21656   "\t11\t-9\t-3",
21657   "\t10\t-9\t-1",
21658   "\t9\t-9\t-1",
21659   "\t8\t-9\t-1",
21660   "\t7\t-9\t-1",
21661   "\t6\t-9\t-1",
21662   "\t5\t-9\t-1",
21663   "\t4\t-8\t-1",
21664   "\t3\t-8\t-4",
21665   "Croatia",
21666   "\t47\t14\t18",
21667   "\t46\t12\t20",
21668   "\t45\t12\t20",
21669   "\t44\t12\t20",
21670   "\t43\t12\t20",
21671   "\t42\t14\t19",
21672   "\t41\t15\t19",
21673   "Cuba",
21674   "\t24\t-84\t-79",
21675   "\t23\t-85\t-76",
21676   "\t22\t-85\t-74",
21677   "\t21\t-85\t-73",
21678   "\t20\t-85\t-73",
21679   "\t19\t-80\t-73",
21680   "\t18\t-78\t-73",
21681   "Curacao",
21682   "\t13\t-70\t-67",
21683   "\t12\t-70\t-67",
21684   "\t11\t-70\t-67",
21685   "Cyprus",
21686   "\t36\t31\t35",
21687   "\t35\t31\t35",
21688   "\t34\t31\t35",
21689   "\t33\t31\t35",
21690   "Cyprus",
21691   "\t36\t31\t35",
21692   "\t35\t31\t35",
21693   "\t34\t31\t35",
21694   "Cyprus",
21695   "\t35\t31\t34",
21696   "\t34\t31\t34",
21697   "\t33\t31\t34",
21698   "Cyprus",
21699   "\t36\t32\t34",
21700   "\t35\t32\t34",
21701   "\t34\t32\t34",
21702   "\t33\t32\t34",
21703   "Czech Republic",
21704   "\t52\t13\t16",
21705   "\t51\t11\t19",
21706   "\t50\t11\t19",
21707   "\t49\t11\t19",
21708   "\t48\t11\t19",
21709   "\t47\t12\t18",
21710   "Democratic Republic of the Congo",
21711   "\t6\t18\t20\t23\t28",
21712   "\t5\t17\t31",
21713   "\t4\t17\t31",
21714   "\t3\t17\t32",
21715   "\t2\t16\t32",
21716   "\t1\t16\t32",
21717   "\t0\t15\t32",
21718   "\t-1\t15\t31",
21719   "\t-2\t14\t30",
21720   "\t-3\t11\t30",
21721   "\t-4\t11\t30",
21722   "\t-5\t11\t31",
21723   "\t-6\t11\t31",
21724   "\t-7\t11\t13\t15\t31",
21725   "\t-8\t15\t31",
21726   "\t-9\t16\t31",
21727   "\t-10\t20\t29",
21728   "\t-11\t21\t30",
21729   "\t-12\t21\t30",
21730   "\t-13\t25\t30",
21731   "\t-14\t27\t30",
21732   "Denmark",
21733   "\t58\t7\t12",
21734   "\t57\t7\t13",
21735   "\t56\t7\t16",
21736   "\t55\t7\t16",
21737   "\t54\t7\t16",
21738   "\t53\t7\t16",
21739   "Djibouti",
21740   "\t13\t41\t44",
21741   "\t12\t40\t44",
21742   "\t11\t40\t44",
21743   "\t10\t40\t44",
21744   "\t9\t40\t43",
21745   "Dominica",
21746   "\t16\t-62\t-60",
21747   "\t15\t-62\t-60",
21748   "\t14\t-62\t-60",
21749   "Dominican Republic",
21750   "\t20\t-72\t-67",
21751   "\t19\t-73\t-67",
21752   "\t18\t-73\t-67",
21753   "\t17\t-73\t-67",
21754   "\t16\t-72\t-70",
21755   "Ecuador",
21756   "\t2\t-80\t-77",
21757   "\t1\t-81\t-74",
21758   "\t0\t-81\t-74",
21759   "\t-1\t-82\t-74",
21760   "\t-2\t-82\t-74",
21761   "\t-3\t-82\t-74",
21762   "\t-4\t-81\t-76",
21763   "\t-5\t-81\t-77",
21764   "\t-6\t-80\t-78",
21765   "Ecuador: Galapagos",
21766   "\t2\t-93\t-90",
21767   "\t1\t-93\t-88",
21768   "\t0\t-93\t-88",
21769   "\t-1\t-92\t-88",
21770   "\t-2\t-92\t-88",
21771   "Egypt",
21772   "\t32\t23\t35",
21773   "\t31\t23\t35",
21774   "\t30\t23\t35",
21775   "\t29\t23\t35",
21776   "\t28\t23\t35",
21777   "\t27\t23\t35",
21778   "\t26\t23\t35",
21779   "\t25\t23\t36",
21780   "\t24\t23\t36",
21781   "\t23\t23\t37",
21782   "\t22\t23\t37",
21783   "\t21\t23\t37",
21784   "\t20\t23\t37",
21785   "El Salvador",
21786   "\t15\t-90\t-87",
21787   "\t14\t-91\t-86",
21788   "\t13\t-91\t-86",
21789   "\t12\t-91\t-86",
21790   "Equatorial Guinea",
21791   "\t4\t7\t9",
21792   "\t3\t7\t12",
21793   "\t2\t7\t12",
21794   "\t1\t8\t12",
21795   "\t0\t4\t6\t8\t12",
21796   "\t-1\t4\t6\t8\t10",
21797   "\t-2\t4\t6",
21798   "Eritrea",
21799   "\t19\t37\t39",
21800   "\t18\t35\t40",
21801   "\t17\t35\t41",
21802   "\t16\t35\t41",
21803   "\t15\t35\t42",
21804   "\t14\t35\t43",
21805   "\t13\t35\t44",
21806   "\t12\t39\t44",
21807   "\t11\t40\t44",
21808   "Estonia",
21809   "\t60\t21\t29",
21810   "\t59\t20\t29",
21811   "\t58\t20\t29",
21812   "\t57\t20\t28",
21813   "\t56\t20\t28",
21814   "Ethiopia",
21815   "\t15\t35\t41",
21816   "\t14\t35\t42",
21817   "\t13\t34\t43",
21818   "\t12\t33\t43",
21819   "\t11\t33\t44",
21820   "\t10\t33\t44",
21821   "\t9\t32\t47",
21822   "\t8\t31\t48",
21823   "\t7\t31\t48",
21824   "\t6\t31\t48",
21825   "\t5\t33\t47",
21826   "\t4\t33\t46",
21827   "\t3\t34\t46",
21828   "\t2\t36\t42",
21829   "Europa Island",
21830   "\t-21\t39\t41",
21831   "\t-22\t39\t41",
21832   "\t-23\t39\t41",
21833   "Falkland Islands (Islas Malvinas)",
21834   "\t-50\t-62\t-56",
21835   "\t-51\t-62\t-56",
21836   "\t-52\t-62\t-56",
21837   "\t-53\t-62\t-57",
21838   "Faroe Islands",
21839   "\t63\t-8\t-5",
21840   "\t62\t-8\t-5",
21841   "\t61\t-8\t-5",
21842   "\t60\t-7\t-5",
21843   "Fiji",
21844   "\t-11\t176\t178",
21845   "\t-12\t176\t178",
21846   "\t-13\t176\t178",
21847   "\t-15\t-180\t-178\t176\t180",
21848   "\t-16\t-180\t-177\t176\t180",
21849   "\t-17\t-180\t-177\t176\t180",
21850   "\t-18\t-180\t-177\t176\t180",
21851   "\t-19\t-180\t-177\t176\t180",
21852   "\t-20\t-180\t-177\t173\t180",
21853   "\t-21\t-179\t-177\t173\t175",
21854   "\t-22\t173\t175",
21855   "Finland",
21856   "\t71\t26\t28",
21857   "\t70\t19\t30",
21858   "\t69\t19\t30",
21859   "\t68\t19\t31",
21860   "\t67\t19\t31",
21861   "\t66\t22\t31",
21862   "\t65\t22\t31",
21863   "\t64\t20\t32",
21864   "\t63\t20\t32",
21865   "\t62\t20\t32",
21866   "\t61\t20\t32",
21867   "\t60\t20\t31",
21868   "\t59\t20\t29",
21869   "\t58\t21\t25",
21870   "Finland",
21871   "\t61\t18\t22",
21872   "\t60\t18\t22",
21873   "\t59\t18\t22",
21874   "\t58\t19\t21",
21875   "France",
21876   "\t52\t0\t3",
21877   "\t51\t0\t5",
21878   "\t50\t-2\t8",
21879   "\t49\t-6\t9",
21880   "\t48\t-6\t9",
21881   "\t47\t-6\t9",
21882   "\t46\t-5\t8",
21883   "\t45\t-3\t8",
21884   "\t44\t-2\t8",
21885   "\t43\t-2\t8",
21886   "\t42\t-2\t8",
21887   "\t41\t-2\t7",
21888   "France: Corsica",
21889   "\t44\t8\t10",
21890   "\t43\t7\t10",
21891   "\t42\t7\t10",
21892   "\t41\t7\t10",
21893   "\t40\t7\t10",
21894   "France: Saint Barthelemy",
21895   "\t18\t-63\t-61",
21896   "\t17\t-63\t-61",
21897   "\t16\t-63\t-61",
21898   "France: Saint Martin",
21899   "\t19\t-64\t-62",
21900   "\t18\t-64\t-62",
21901   "\t17\t-64\t-62",
21902   "French Guiana",
21903   "\t6\t-55\t-51",
21904   "\t5\t-55\t-50",
21905   "\t4\t-55\t-50",
21906   "\t3\t-55\t-50",
21907   "\t2\t-55\t-50",
21908   "\t1\t-55\t-51",
21909   "French Polynesia",
21910   "\t-6\t-141\t-139",
21911   "\t-7\t-141\t-138",
21912   "\t-8\t-141\t-137",
21913   "\t-9\t-141\t-137",
21914   "\t-10\t-141\t-137",
21915   "\t-11\t-140\t-137",
21916   "\t-13\t-149\t-140",
21917   "\t-14\t-155\t-153\t-149\t-139",
21918   "\t-15\t-155\t-139",
21919   "\t-16\t-155\t-137",
21920   "\t-17\t-154\t-135",
21921   "\t-18\t-151\t-148\t-146\t-135",
21922   "\t-19\t-142\t-135",
21923   "\t-20\t-142\t-134",
21924   "\t-21\t-152\t-150\t-141\t-134",
21925   "\t-22\t-152\t-146\t-141\t-133",
21926   "\t-23\t-152\t-146\t-137\t-133",
21927   "\t-24\t-150\t-146\t-136\t-133",
21928   "\t-26\t-145\t-143",
21929   "\t-27\t-145\t-143",
21930   "\t-28\t-145\t-143",
21931   "French Southern and Antarctic Lands",
21932   "\t-10\t46\t48",
21933   "\t-11\t46\t48",
21934   "\t-12\t46\t48",
21935   "\t-14\t53\t55",
21936   "\t-15\t53\t55",
21937   "\t-16\t41\t43\t53\t55",
21938   "\t-17\t41\t43",
21939   "\t-18\t41\t43",
21940   "\t-20\t38\t40",
21941   "\t-21\t38\t41",
21942   "\t-22\t38\t41",
21943   "\t-23\t39\t41",
21944   "\t-36\t76\t78",
21945   "\t-37\t76\t78",
21946   "\t-38\t76\t78",
21947   "\t-39\t76\t78",
21948   "\t-45\t49\t52",
21949   "\t-46\t49\t52",
21950   "\t-47\t49\t52\t67\t70",
21951   "\t-48\t67\t71",
21952   "\t-49\t67\t71",
21953   "\t-50\t67\t71",
21954   "Gabon",
21955   "\t3\t10\t14",
21956   "\t2\t8\t15",
21957   "\t1\t7\t15",
21958   "\t0\t7\t15",
21959   "\t-1\t7\t15",
21960   "\t-2\t7\t15",
21961   "\t-3\t8\t15",
21962   "\t-4\t9\t12",
21963   "Gambia",
21964   "\t14\t-17\t-12",
21965   "\t13\t-17\t-12",
21966   "\t12\t-17\t-12",
21967   "Gaza Strip",
21968   "\t32\t33\t35",
21969   "\t31\t33\t35",
21970   "\t30\t33\t35",
21971   "Georgia",
21972   "\t44\t38\t44",
21973   "\t43\t38\t47",
21974   "\t42\t38\t47",
21975   "\t41\t39\t47",
21976   "\t40\t40\t47",
21977   "Germany",
21978   "\t56\t7\t9",
21979   "\t55\t6\t15",
21980   "\t54\t5\t15",
21981   "\t53\t5\t15",
21982   "\t52\t4\t16",
21983   "\t51\t4\t16",
21984   "\t50\t4\t16",
21985   "\t49\t4\t15",
21986   "\t48\t5\t14",
21987   "\t47\t6\t14",
21988   "\t46\t6\t14",
21989   "Ghana",
21990   "\t12\t-2\t1",
21991   "\t11\t-3\t1",
21992   "\t10\t-3\t1",
21993   "\t9\t-3\t1",
21994   "\t8\t-4\t1",
21995   "\t7\t-4\t2",
21996   "\t6\t-4\t2",
21997   "\t5\t-4\t2",
21998   "\t4\t-4\t2",
21999   "\t3\t-3\t0",
22000   "Gibraltar",
22001   "\t37\t-6\t-4",
22002   "\t36\t-6\t-4",
22003   "\t35\t-6\t-4",
22004   "Glorioso Islands",
22005   "\t-10\t46\t48",
22006   "\t-11\t46\t48",
22007   "\t-12\t46\t48",
22008   "Greece",
22009   "\t42\t20\t27",
22010   "\t41\t19\t27",
22011   "\t40\t18\t27",
22012   "\t39\t18\t27",
22013   "\t38\t18\t28",
22014   "\t37\t19\t29",
22015   "\t36\t19\t29",
22016   "\t35\t20\t29",
22017   "\t34\t22\t28",
22018   "\t33\t23\t26",
22019   "Greenland",
22020   "\t84\t-47\t-23",
22021   "\t83\t-60\t-18",
22022   "\t82\t-65\t-10",
22023   "\t81\t-68\t-10",
22024   "\t80\t-69\t-10",
22025   "\t79\t-74\t-13",
22026   "\t78\t-74\t-16",
22027   "\t77\t-74\t-16",
22028   "\t76\t-73\t-16",
22029   "\t75\t-72\t-16",
22030   "\t74\t-68\t-65\t-61\t-16",
22031   "\t73\t-58\t-16",
22032   "\t72\t-57\t-19",
22033   "\t71\t-57\t-20",
22034   "\t70\t-56\t-20",
22035   "\t69\t-56\t-20",
22036   "\t68\t-55\t-21",
22037   "\t67\t-54\t-24",
22038   "\t66\t-54\t-31",
22039   "\t65\t-54\t-32",
22040   "\t64\t-54\t-34",
22041   "\t63\t-53\t-39",
22042   "\t62\t-52\t-39",
22043   "\t61\t-51\t-40",
22044   "\t60\t-50\t-41",
22045   "\t59\t-49\t-41",
22046   "\t58\t-45\t-42",
22047   "Grenada",
22048   "\t13\t-62\t-60",
22049   "\t12\t-62\t-60",
22050   "\t11\t-62\t-60",
22051   "Guadeloupe",
22052   "\t17\t-62\t-59",
22053   "\t16\t-62\t-59",
22054   "\t15\t-62\t-59",
22055   "\t14\t-62\t-60",
22056   "Guam",
22057   "\t14\t143\t145",
22058   "\t13\t143\t145",
22059   "\t12\t143\t145",
22060   "Guatemala",
22061   "\t18\t-92\t-88",
22062   "\t17\t-92\t-88",
22063   "\t16\t-93\t-87",
22064   "\t15\t-93\t-87",
22065   "\t14\t-93\t-87",
22066   "\t13\t-93\t-88",
22067   "\t12\t-92\t-89",
22068   "Guernsey",
22069   "\t50\t-3\t-1",
22070   "\t49\t-3\t-1",
22071   "\t48\t-3\t-1",
22072   "Guinea",
22073   "\t13\t-14\t-7",
22074   "\t12\t-15\t-7",
22075   "\t11\t-16\t-6",
22076   "\t10\t-16\t-6",
22077   "\t9\t-16\t-6",
22078   "\t8\t-14\t-6",
22079   "\t7\t-11\t-6",
22080   "\t6\t-10\t-7",
22081   "Guinea-Bissau",
22082   "\t13\t-17\t-12",
22083   "\t12\t-17\t-12",
22084   "\t11\t-17\t-12",
22085   "\t10\t-17\t-12",
22086   "\t9\t-16\t-13",
22087   "Guyana",
22088   "\t9\t-61\t-58",
22089   "\t8\t-61\t-57",
22090   "\t7\t-62\t-56",
22091   "\t6\t-62\t-56",
22092   "\t5\t-62\t-56",
22093   "\t4\t-62\t-56",
22094   "\t3\t-61\t-55",
22095   "\t2\t-61\t-55",
22096   "\t1\t-61\t-55",
22097   "\t0\t-60\t-55",
22098   "Haiti",
22099   "\t21\t-73\t-71",
22100   "\t20\t-74\t-70",
22101   "\t19\t-75\t-70",
22102   "\t18\t-75\t-70",
22103   "\t17\t-75\t-70",
22104   "Heard Island and McDonald Islands",
22105   "\t-51\t72\t74",
22106   "\t-52\t72\t74",
22107   "\t-53\t72\t74",
22108   "\t-54\t72\t74",
22109   "Honduras",
22110   "\t18\t-84\t-82",
22111   "\t17\t-87\t-82",
22112   "\t16\t-90\t-82",
22113   "\t15\t-90\t-82",
22114   "\t14\t-90\t-82",
22115   "\t13\t-90\t-82",
22116   "\t12\t-89\t-84",
22117   "\t11\t-88\t-86",
22118   "Hong Kong",
22119   "\t23\t112\t115",
22120   "\t22\t112\t115",
22121   "\t21\t112\t115",
22122   "Howland Island",
22123   "\t1\t-177\t-175",
22124   "\t0\t-177\t-175",
22125   "\t-1\t-177\t-175",
22126   "Hungary",
22127   "\t49\t16\t23",
22128   "\t48\t15\t23",
22129   "\t47\t15\t23",
22130   "\t46\t15\t23",
22131   "\t45\t15\t22",
22132   "\t44\t16\t20",
22133   "Iceland",
22134   "\t67\t-24\t-13",
22135   "\t66\t-25\t-12",
22136   "\t65\t-25\t-12",
22137   "\t64\t-25\t-12",
22138   "\t63\t-25\t-12",
22139   "\t62\t-23\t-15",
22140   "India",
22141   "\t36\t76\t79",
22142   "\t35\t72\t79",
22143   "\t34\t72\t80",
22144   "\t33\t72\t80",
22145   "\t32\t72\t80",
22146   "\t31\t72\t82",
22147   "\t30\t71\t82\t93\t97",
22148   "\t29\t69\t82\t87\t89\t91\t98",
22149   "\t28\t68\t98",
22150   "\t27\t68\t98",
22151   "\t26\t68\t98",
22152   "\t25\t67\t96",
22153   "\t24\t67\t96",
22154   "\t23\t67\t95",
22155   "\t22\t67\t95",
22156   "\t21\t67\t94",
22157   "\t20\t68\t93",
22158   "\t19\t69\t88",
22159   "\t18\t71\t87",
22160   "\t17\t71\t85",
22161   "\t16\t72\t84",
22162   "\t15\t72\t83",
22163   "\t14\t72\t82\t91\t95",
22164   "\t13\t73\t81\t91\t95",
22165   "\t12\t71\t81\t91\t95",
22166   "\t11\t71\t81\t91\t94",
22167   "\t10\t71\t80\t91\t94",
22168   "\t9\t71\t80\t91\t94",
22169   "\t8\t72\t80\t91\t94",
22170   "\t7\t72\t79\t92\t94",
22171   "\t6\t92\t94",
22172   "\t5\t92\t94",
22173   "Indonesia",
22174   "\t6\t94\t98\t125\t127",
22175   "\t5\t94\t99\t106\t109\t114\t118\t125\t128",
22176   "\t4\t94\t100\t104\t109\t114\t118\t124\t128",
22177   "\t3\t94\t102\t104\t110\t113\t119\t124\t129",
22178   "\t2\t94\t132",
22179   "\t1\t94\t137",
22180   "\t0\t96\t139",
22181   "\t-1\t96\t141",
22182   "\t-2\t97\t141",
22183   "\t-3\t98\t141",
22184   "\t-4\t99\t141",
22185   "\t-5\t101\t141",
22186   "\t-6\t101\t116\t118\t141",
22187   "\t-7\t104\t141",
22188   "\t-8\t105\t132\t136\t141",
22189   "\t-9\t109\t132\t136\t141",
22190   "\t-10\t115\t126\t139\t141",
22191   "\t-11\t119\t125",
22192   "Iran",
22193   "\t40\t43\t49",
22194   "\t39\t43\t49\t54\t58",
22195   "\t38\t43\t61",
22196   "\t37\t43\t62",
22197   "\t36\t43\t62",
22198   "\t35\t43\t62",
22199   "\t34\t44\t62",
22200   "\t33\t44\t62",
22201   "\t32\t44\t62",
22202   "\t31\t45\t62",
22203   "\t30\t46\t62",
22204   "\t29\t46\t63",
22205   "\t28\t47\t64",
22206   "\t27\t49\t64",
22207   "\t26\t50\t64",
22208   "\t25\t52\t64",
22209   "\t24\t53\t62",
22210   "Iraq",
22211   "\t38\t41\t45",
22212   "\t37\t40\t46",
22213   "\t36\t40\t47",
22214   "\t35\t39\t47",
22215   "\t34\t37\t47",
22216   "\t33\t37\t48",
22217   "\t32\t37\t48",
22218   "\t31\t37\t49",
22219   "\t30\t39\t49",
22220   "\t29\t41\t49",
22221   "\t28\t42\t49",
22222   "Ireland",
22223   "\t56\t-9\t-5",
22224   "\t55\t-11\t-5",
22225   "\t54\t-11\t-5",
22226   "\t53\t-11\t-4",
22227   "\t52\t-11\t-4",
22228   "\t51\t-11\t-4",
22229   "\t50\t-11\t-6",
22230   "Isle of Man",
22231   "\t55\t-5\t-3",
22232   "\t54\t-5\t-3",
22233   "\t53\t-5\t-3",
22234   "Israel",
22235   "\t34\t34\t36",
22236   "\t33\t33\t36",
22237   "\t32\t33\t36",
22238   "\t31\t33\t36",
22239   "\t30\t33\t36",
22240   "\t29\t33\t36",
22241   "\t28\t33\t36",
22242   "Italy",
22243   "\t48\t10\t13",
22244   "\t47\t6\t14",
22245   "\t46\t5\t14",
22246   "\t45\t5\t14",
22247   "\t44\t5\t14",
22248   "\t43\t5\t16",
22249   "\t42\t6\t18",
22250   "\t41\t7\t19",
22251   "\t40\t7\t19",
22252   "\t39\t7\t19",
22253   "\t38\t7\t19",
22254   "\t37\t7\t18",
22255   "\t36\t10\t17",
22256   "\t35\t10\t16",
22257   "\t34\t11\t13",
22258   "Jamaica",
22259   "\t19\t-79\t-75",
22260   "\t18\t-79\t-75",
22261   "\t17\t-79\t-75",
22262   "\t16\t-78\t-75",
22263   "Jan Mayen",
22264   "\t72\t-9\t-6",
22265   "\t71\t-10\t-6",
22266   "\t70\t-10\t-6",
22267   "\t69\t-10\t-7",
22268   "Japan",
22269   "\t46\t139\t143",
22270   "\t45\t139\t146",
22271   "\t44\t139\t146",
22272   "\t43\t138\t146",
22273   "\t42\t138\t146",
22274   "\t41\t138\t146",
22275   "\t40\t138\t144",
22276   "\t39\t137\t143",
22277   "\t38\t135\t143",
22278   "\t37\t131\t142",
22279   "\t36\t131\t142",
22280   "\t35\t128\t141",
22281   "\t34\t128\t141",
22282   "\t33\t127\t140",
22283   "\t32\t127\t141",
22284   "\t31\t127\t134\t138\t141",
22285   "\t30\t128\t132\t139\t141",
22286   "\t29\t128\t132\t139\t141",
22287   "\t28\t126\t131\t139\t143",
22288   "\t27\t125\t131\t139\t143",
22289   "\t26\t122\t132\t139\t143",
22290   "\t25\t121\t132\t140\t143\t152\t154",
22291   "\t24\t121\t126\t130\t132\t140\t142\t152\t154",
22292   "\t23\t121\t126\t140\t142\t152\t154",
22293   "Jarvis Island",
22294   "\t1\t-161\t-159",
22295   "\t0\t-161\t-159",
22296   "\t-1\t-161\t-159",
22297   "Jersey",
22298   "\t50\t-3\t-1",
22299   "\t49\t-3\t-1",
22300   "\t48\t-3\t-1",
22301   "Johnston Atoll",
22302   "\t17\t-170\t-168",
22303   "\t16\t-170\t-168",
22304   "\t15\t-170\t-168",
22305   "Jordan",
22306   "\t34\t37\t39",
22307   "\t33\t34\t40",
22308   "\t32\t34\t40",
22309   "\t31\t34\t40",
22310   "\t30\t33\t39",
22311   "\t29\t33\t38",
22312   "\t28\t33\t38",
22313   "Juan de Nova Island",
22314   "\t-16\t41\t43",
22315   "\t-17\t41\t43",
22316   "\t-18\t41\t43",
22317   "Kazakhstan",
22318   "\t56\t67\t71",
22319   "\t55\t60\t77",
22320   "\t54\t59\t79",
22321   "\t53\t59\t79",
22322   "\t52\t48\t84",
22323   "\t51\t46\t86",
22324   "\t50\t45\t88",
22325   "\t49\t45\t88",
22326   "\t48\t45\t88",
22327   "\t47\t45\t87",
22328   "\t46\t46\t86",
22329   "\t45\t47\t86",
22330   "\t44\t48\t83",
22331   "\t43\t48\t56\t58\t81",
22332   "\t42\t49\t56\t60\t81",
22333   "\t41\t50\t56\t64\t81",
22334   "\t40\t51\t56\t65\t71",
22335   "\t39\t66\t69",
22336   "Kenya",
22337   "\t6\t34\t36",
22338   "\t5\t32\t42",
22339   "\t4\t32\t42",
22340   "\t3\t32\t42",
22341   "\t2\t33\t42",
22342   "\t1\t32\t42",
22343   "\t0\t32\t42",
22344   "\t-1\t32\t42",
22345   "\t-2\t32\t42",
22346   "\t-3\t34\t42",
22347   "\t-4\t36\t41",
22348   "\t-5\t37\t40",
22349   "Kerguelen Archipelago",
22350   "\t-47\t67\t70",
22351   "\t-48\t67\t71",
22352   "\t-49\t67\t71",
22353   "\t-50\t67\t71",
22354   "Kingman Reef",
22355   "\t7\t-163\t-161",
22356   "\t6\t-163\t-161",
22357   "\t5\t-163\t-161",
22358   "Kiribati",
22359   "\t5\t-161\t-159",
22360   "\t4\t-161\t-158\t171\t173",
22361   "\t3\t-161\t-156\t171\t174",
22362   "\t2\t-160\t-156\t171\t174",
22363   "\t1\t-158\t-156\t168\t175",
22364   "\t0\t-158\t-156\t168\t177",
22365   "\t-1\t-172\t-170\t168\t177",
22366   "\t-2\t-172\t-170\t173\t177",
22367   "\t-3\t-175\t-170\t-156\t-153\t174\t177",
22368   "\t-4\t-175\t-170\t-156\t-153",
22369   "\t-5\t-175\t-170\t-156\t-153",
22370   "\t-6\t-156\t-154",
22371   "\t-10\t-152\t-150",
22372   "\t-11\t-152\t-150",
22373   "\t-12\t-152\t-150",
22374   "Kosovo",
22375   "\t44\t19\t22",
22376   "\t43\t19\t22",
22377   "\t42\t19\t22",
22378   "\t41\t19\t22",
22379   "\t40\t19\t21",
22380   "Kuwait",
22381   "\t31\t46\t49",
22382   "\t30\t45\t49",
22383   "\t29\t45\t49",
22384   "\t28\t45\t49",
22385   "\t27\t46\t49",
22386   "Kyrgyzstan",
22387   "\t44\t72\t75",
22388   "\t43\t69\t81",
22389   "\t42\t69\t81",
22390   "\t41\t68\t81",
22391   "\t40\t68\t80",
22392   "\t39\t68\t78",
22393   "\t38\t68\t74",
22394   "Laos",
22395   "\t23\t100\t103",
22396   "\t22\t99\t104",
22397   "\t21\t99\t105",
22398   "\t20\t99\t105",
22399   "\t19\t99\t106",
22400   "\t18\t99\t107",
22401   "\t17\t99\t108",
22402   "\t16\t99\t108",
22403   "\t15\t103\t108",
22404   "\t14\t104\t108",
22405   "\t13\t104\t108",
22406   "\t12\t104\t107",
22407   "Latvia",
22408   "\t59\t23\t26",
22409   "\t58\t20\t28",
22410   "\t57\t19\t29",
22411   "\t56\t19\t29",
22412   "\t55\t19\t29",
22413   "\t54\t24\t28",
22414   "Lebanon",
22415   "\t35\t34\t37",
22416   "\t34\t34\t37",
22417   "\t33\t34\t37",
22418   "\t32\t34\t37",
22419   "Lesotho",
22420   "\t-27\t26\t30",
22421   "\t-28\t26\t30",
22422   "\t-29\t26\t30",
22423   "\t-30\t26\t30",
22424   "\t-31\t26\t29",
22425   "Liberia",
22426   "\t9\t-11\t-8",
22427   "\t8\t-12\t-7",
22428   "\t7\t-12\t-6",
22429   "\t6\t-12\t-6",
22430   "\t5\t-12\t-6",
22431   "\t4\t-11\t-6",
22432   "\t3\t-10\t-6",
22433   "Libya",
22434   "\t34\t10\t12",
22435   "\t33\t9\t16\t19\t25",
22436   "\t32\t9\t26",
22437   "\t31\t8\t26",
22438   "\t30\t8\t26",
22439   "\t29\t8\t25",
22440   "\t28\t8\t25",
22441   "\t27\t8\t25",
22442   "\t26\t8\t25",
22443   "\t25\t8\t25",
22444   "\t24\t8\t25",
22445   "\t23\t9\t25",
22446   "\t22\t10\t25",
22447   "\t21\t12\t25",
22448   "\t20\t17\t25",
22449   "\t19\t20\t25",
22450   "\t18\t21\t25",
22451   "Liechtenstein",
22452   "\t48\t8\t10",
22453   "\t47\t8\t10",
22454   "\t46\t8\t10",
22455   "Lithuania",
22456   "\t57\t20\t26",
22457   "\t56\t19\t27",
22458   "\t55\t19\t27",
22459   "\t54\t19\t27",
22460   "\t53\t21\t27",
22461   "\t52\t22\t25",
22462   "Luxembourg",
22463   "\t51\t4\t7",
22464   "\t50\t4\t7",
22465   "\t49\t4\t7",
22466   "\t48\t4\t7",
22467   "Macau",
22468   "\t23\t112\t114",
22469   "\t22\t112\t114",
22470   "\t21\t112\t114",
22471   "Macedonia",
22472   "\t43\t19\t23",
22473   "\t42\t19\t24",
22474   "\t41\t19\t24",
22475   "\t40\t19\t24",
22476   "\t39\t19\t22",
22477   "Madagascar",
22478   "\t-10\t48\t50",
22479   "\t-11\t47\t50",
22480   "\t-12\t46\t51",
22481   "\t-13\t46\t51",
22482   "\t-14\t44\t51",
22483   "\t-15\t43\t51",
22484   "\t-16\t42\t51",
22485   "\t-17\t42\t51",
22486   "\t-18\t42\t50",
22487   "\t-19\t42\t50",
22488   "\t-20\t42\t50",
22489   "\t-21\t42\t49",
22490   "\t-22\t42\t49",
22491   "\t-23\t42\t49",
22492   "\t-24\t42\t48",
22493   "\t-25\t42\t48",
22494   "\t-26\t43\t48",
22495   "Malawi",
22496   "\t-8\t31\t35",
22497   "\t-9\t31\t35",
22498   "\t-10\t31\t35",
22499   "\t-11\t31\t35",
22500   "\t-12\t31\t36",
22501   "\t-13\t31\t36",
22502   "\t-14\t31\t36",
22503   "\t-15\t31\t36",
22504   "\t-16\t33\t36",
22505   "\t-17\t33\t36",
22506   "\t-18\t34\t36",
22507   "Malaysia",
22508   "\t8\t115\t118",
22509   "\t7\t98\t103\t115\t119",
22510   "\t6\t98\t104\t114\t120",
22511   "\t5\t98\t104\t112\t120",
22512   "\t4\t99\t104\t111\t120",
22513   "\t3\t99\t105\t108\t119",
22514   "\t2\t99\t105\t108\t116",
22515   "\t1\t100\t105\t108\t116",
22516   "\t0\t101\t105\t108\t115",
22517   "\t-1\t109\t112",
22518   "Maldives",
22519   "\t8\t71\t73",
22520   "\t7\t71\t74",
22521   "\t6\t71\t74",
22522   "\t5\t71\t74",
22523   "\t4\t71\t74",
22524   "\t3\t71\t74",
22525   "\t2\t71\t74",
22526   "\t1\t71\t74",
22527   "\t0\t71\t74",
22528   "\t-1\t71\t74",
22529   "Mali",
22530   "\t25\t-7\t-2",
22531   "\t24\t-7\t0",
22532   "\t23\t-7\t1",
22533   "\t22\t-7\t2",
22534   "\t21\t-7\t3",
22535   "\t20\t-7\t5",
22536   "\t19\t-7\t5",
22537   "\t18\t-7\t5",
22538   "\t17\t-6\t5",
22539   "\t16\t-12\t5",
22540   "\t15\t-13\t5",
22541   "\t14\t-13\t4",
22542   "\t13\t-13\t1",
22543   "\t12\t-13\t-1",
22544   "\t11\t-12\t-3",
22545   "\t10\t-12\t-3",
22546   "\t9\t-9\t-4",
22547   "Malta",
22548   "\t37\t13\t15",
22549   "\t36\t13\t15",
22550   "\t35\t13\t15",
22551   "\t34\t13\t15",
22552   "Marshall Islands",
22553   "\t15\t167\t170",
22554   "\t14\t167\t170",
22555   "\t13\t167\t170",
22556   "\t12\t164\t167",
22557   "\t11\t164\t167\t169\t171",
22558   "\t10\t164\t167\t169\t171",
22559   "\t9\t166\t171",
22560   "\t8\t166\t172",
22561   "\t7\t166\t173",
22562   "\t6\t167\t173",
22563   "\t5\t167\t173",
22564   "\t4\t167\t170",
22565   "\t3\t167\t169",
22566   "Martinique",
22567   "\t15\t-62\t-59",
22568   "\t14\t-62\t-59",
22569   "\t13\t-62\t-59",
22570   "Mauritania",
22571   "\t28\t-9\t-7",
22572   "\t27\t-9\t-5",
22573   "\t26\t-13\t-3",
22574   "\t25\t-13\t-3",
22575   "\t24\t-14\t-3",
22576   "\t23\t-14\t-3",
22577   "\t22\t-18\t-5",
22578   "\t21\t-18\t-5",
22579   "\t20\t-18\t-4",
22580   "\t19\t-18\t-4",
22581   "\t18\t-17\t-4",
22582   "\t17\t-17\t-4",
22583   "\t16\t-17\t-4",
22584   "\t15\t-17\t-4",
22585   "\t14\t-17\t-4",
22586   "\t13\t-13\t-10",
22587   "Mauritius",
22588   "\t-9\t55\t57",
22589   "\t-10\t55\t57",
22590   "\t-11\t55\t57",
22591   "\t-18\t56\t58\t62\t64",
22592   "\t-19\t56\t58\t62\t64",
22593   "\t-20\t56\t58\t62\t64",
22594   "\t-21\t56\t58",
22595   "Mayotte",
22596   "\t-11\t44\t46",
22597   "\t-12\t44\t46",
22598   "\t-13\t44\t46",
22599   "Mexico",
22600   "\t33\t-118\t-112",
22601   "\t32\t-118\t-104",
22602   "\t31\t-118\t-103",
22603   "\t30\t-119\t-99",
22604   "\t29\t-119\t-98",
22605   "\t28\t-119\t-98",
22606   "\t27\t-119\t-96",
22607   "\t26\t-116\t-96",
22608   "\t25\t-116\t-96",
22609   "\t24\t-116\t-96",
22610   "\t23\t-116\t-96\t-90\t-88",
22611   "\t22\t-111\t-96\t-91\t-85",
22612   "\t21\t-111\t-95\t-91\t-85",
22613   "\t20\t-111\t-109\t-107\t-94\t-92\t-85",
22614   "\t19\t-115\t-109\t-106\t-85",
22615   "\t18\t-115\t-109\t-106\t-86",
22616   "\t17\t-115\t-109\t-105\t-86",
22617   "\t16\t-103\t-87",
22618   "\t15\t-101\t-89",
22619   "\t14\t-98\t-90",
22620   "\t13\t-93\t-91",
22621   "Micronesia",
22622   "\t10\t137\t141",
22623   "\t9\t137\t141\t148\t151",
22624   "\t8\t137\t141\t148\t152",
22625   "\t7\t148\t152\t156\t159",
22626   "\t6\t148\t154\t156\t159\t161\t164",
22627   "\t5\t148\t150\t152\t154\t156\t159\t161\t164",
22628   "\t4\t152\t158\t161\t164",
22629   "\t3\t153\t155",
22630   "\t2\t153\t155",
22631   "\t1\t153\t155",
22632   "\t0\t153\t155",
22633   "\t-1\t153\t155",
22634   "Midway Islands",
22635   "\t29\t-178\t-176",
22636   "\t28\t-178\t-176",
22637   "\t27\t-178\t-176",
22638   "Moldova",
22639   "\t49\t25\t29",
22640   "\t48\t25\t30",
22641   "\t47\t25\t31",
22642   "\t46\t26\t31",
22643   "\t45\t27\t31",
22644   "\t44\t27\t29",
22645   "Monaco",
22646   "\t44\t6\t8",
22647   "\t43\t6\t8",
22648   "\t42\t6\t8",
22649   "Mongolia",
22650   "\t53\t97\t100",
22651   "\t52\t96\t103",
22652   "\t51\t88\t108\t112\t117",
22653   "\t50\t86\t117",
22654   "\t49\t86\t119",
22655   "\t48\t86\t120",
22656   "\t47\t86\t120",
22657   "\t46\t88\t120",
22658   "\t45\t89\t120",
22659   "\t44\t89\t117",
22660   "\t43\t91\t115",
22661   "\t42\t94\t112",
22662   "\t41\t95\t111",
22663   "\t40\t102\t106",
22664   "Montenegro",
22665   "\t44\t17\t21",
22666   "\t43\t17\t21",
22667   "\t42\t17\t21",
22668   "\t41\t17\t21",
22669   "\t40\t18\t20",
22670   "Montserrat",
22671   "\t17\t-63\t-61",
22672   "\t16\t-63\t-61",
22673   "\t15\t-63\t-61",
22674   "Morocco",
22675   "\t36\t-7\t-1",
22676   "\t35\t-7\t0",
22677   "\t34\t-9\t0",
22678   "\t33\t-10\t0",
22679   "\t32\t-10\t0",
22680   "\t31\t-10\t0",
22681   "\t30\t-11\t-1",
22682   "\t29\t-13\t-2",
22683   "\t28\t-14\t-4",
22684   "\t27\t-15\t-7",
22685   "\t26\t-15\t-7",
22686   "\t25\t-16\t-8",
22687   "\t24\t-17\t-11",
22688   "\t23\t-17\t-11",
22689   "\t22\t-18\t-12",
22690   "\t21\t-18\t-13",
22691   "\t20\t-18\t-13",
22692   "Mozambique",
22693   "\t-9\t38\t41",
22694   "\t-10\t33\t41",
22695   "\t-11\t33\t41",
22696   "\t-12\t33\t41",
22697   "\t-13\t29\t41",
22698   "\t-14\t29\t41",
22699   "\t-15\t29\t41",
22700   "\t-16\t29\t41",
22701   "\t-17\t29\t41",
22702   "\t-18\t31\t39",
22703   "\t-19\t31\t37",
22704   "\t-20\t30\t36",
22705   "\t-21\t30\t36",
22706   "\t-22\t30\t36",
22707   "\t-23\t30\t36",
22708   "\t-24\t30\t36",
22709   "\t-25\t30\t36",
22710   "\t-26\t30\t34",
22711   "\t-27\t31\t33",
22712   "Myanmar",
22713   "\t29\t96\t99",
22714   "\t28\t94\t99",
22715   "\t27\t94\t99",
22716   "\t26\t93\t99",
22717   "\t25\t92\t99",
22718   "\t24\t92\t100",
22719   "\t23\t91\t100",
22720   "\t22\t91\t102",
22721   "\t21\t91\t102",
22722   "\t20\t91\t102",
22723   "\t19\t91\t101",
22724   "\t18\t91\t100",
22725   "\t17\t92\t99",
22726   "\t16\t93\t99",
22727   "\t15\t92\t99",
22728   "\t14\t92\t100",
22729   "\t13\t92\t94\t96\t100",
22730   "\t12\t96\t100",
22731   "\t11\t96\t100",
22732   "\t10\t96\t100",
22733   "\t9\t96\t100",
22734   "\t8\t97\t99",
22735   "Namibia",
22736   "\t-15\t12\t14",
22737   "\t-16\t10\t26",
22738   "\t-17\t10\t26",
22739   "\t-18\t10\t26",
22740   "\t-19\t10\t25",
22741   "\t-20\t11\t21",
22742   "\t-21\t12\t21",
22743   "\t-22\t12\t21",
22744   "\t-23\t13\t21",
22745   "\t-24\t13\t20",
22746   "\t-25\t13\t20",
22747   "\t-26\t13\t20",
22748   "\t-27\t13\t20",
22749   "\t-28\t14\t20",
22750   "\t-29\t14\t20",
22751   "Nauru",
22752   "\t1\t165\t167",
22753   "\t0\t165\t167",
22754   "\t-1\t165\t167",
22755   "Navassa Island",
22756   "\t19\t-76\t-74",
22757   "\t18\t-76\t-74",
22758   "\t17\t-76\t-74",
22759   "Nepal",
22760   "\t31\t79\t83",
22761   "\t30\t79\t85",
22762   "\t29\t79\t87",
22763   "\t28\t79\t89",
22764   "\t27\t79\t89",
22765   "\t26\t80\t89",
22766   "\t25\t83\t89",
22767   "Netherlands",
22768   "\t54\t3\t8",
22769   "\t53\t3\t8",
22770   "\t52\t2\t8",
22771   "\t51\t2\t8",
22772   "\t50\t2\t7",
22773   "\t49\t4\t7",
22774   "Netherlands Antilles",
22775   "\t18\t-64\t-61",
22776   "\t17\t-64\t-61",
22777   "\t16\t-64\t-61",
22778   "\t13\t-69\t-67",
22779   "\t12\t-69\t-67",
22780   "\t11\t-69\t-67",
22781   "New Caledonia",
22782   "\t-18\t162\t164",
22783   "\t-19\t162\t168",
22784   "\t-20\t162\t169",
22785   "\t-21\t162\t172",
22786   "\t-22\t163\t172",
22787   "\t-23\t165\t168\t170\t172",
22788   "New Zealand",
22789   "\t-7\t-173\t-171",
22790   "\t-8\t-173\t-170",
22791   "\t-9\t-173\t-170",
22792   "\t-10\t-172\t-170",
22793   "\t-28\t-178\t-176",
22794   "\t-29\t-178\t-176",
22795   "\t-30\t-178\t-176",
22796   "\t-33\t171\t174",
22797   "\t-34\t171\t175",
22798   "\t-35\t171\t176",
22799   "\t-36\t172\t179",
22800   "\t-37\t172\t179",
22801   "\t-38\t172\t179",
22802   "\t-39\t171\t179",
22803   "\t-40\t170\t178",
22804   "\t-41\t169\t177",
22805   "\t-42\t-177\t-175\t167\t177",
22806   "\t-43\t-177\t-175\t166\t175",
22807   "\t-44\t-177\t-175\t165\t174",
22808   "\t-45\t-177\t-175\t165\t172",
22809   "\t-46\t165\t172",
22810   "\t-47\t165\t171",
22811   "\t-48\t165\t169\t177\t179",
22812   "\t-49\t164\t167\t177\t179",
22813   "\t-50\t164\t167\t177\t179",
22814   "\t-51\t164\t170",
22815   "\t-52\t168\t170",
22816   "\t-53\t168\t170",
22817   "Nicaragua",
22818   "\t16\t-84\t-82",
22819   "\t15\t-87\t-81",
22820   "\t14\t-88\t-81",
22821   "\t13\t-88\t-81",
22822   "\t12\t-88\t-82",
22823   "\t11\t-88\t-82",
22824   "\t10\t-87\t-82",
22825   "\t9\t-85\t-82",
22826   "Niger",
22827   "\t24\t10\t14",
22828   "\t23\t8\t16",
22829   "\t22\t6\t16",
22830   "\t21\t5\t16",
22831   "\t20\t3\t16",
22832   "\t19\t3\t16",
22833   "\t18\t3\t16",
22834   "\t17\t2\t16",
22835   "\t16\t0\t16",
22836   "\t15\t-1\t16",
22837   "\t14\t-1\t15",
22838   "\t13\t-1\t14",
22839   "\t12\t-1\t14",
22840   "\t11\t0\t10",
22841   "\t10\t1\t4",
22842   "Nigeria",
22843   "\t14\t3\t15",
22844   "\t13\t2\t15",
22845   "\t12\t2\t15",
22846   "\t11\t2\t15",
22847   "\t10\t1\t15",
22848   "\t9\t1\t14",
22849   "\t8\t1\t14",
22850   "\t7\t1\t13",
22851   "\t6\t1\t13",
22852   "\t5\t1\t12",
22853   "\t4\t4\t10",
22854   "\t3\t4\t9",
22855   "Niue",
22856   "\t-17\t-170\t-168",
22857   "\t-18\t-170\t-168",
22858   "\t-19\t-170\t-168",
22859   "\t-20\t-170\t-168",
22860   "Norfolk Island",
22861   "\t-27\t166\t168",
22862   "\t-28\t166\t168",
22863   "\t-29\t166\t168",
22864   "\t-30\t166\t168",
22865   "North Korea",
22866   "\t44\t128\t130",
22867   "\t43\t127\t131",
22868   "\t42\t125\t131",
22869   "\t41\t123\t131",
22870   "\t40\t123\t131",
22871   "\t39\t123\t130",
22872   "\t38\t123\t129",
22873   "\t37\t123\t129",
22874   "\t36\t123\t127",
22875   "Northern Mariana Islands",
22876   "\t21\t143\t146",
22877   "\t20\t143\t146",
22878   "\t19\t143\t146",
22879   "\t18\t144\t146",
22880   "\t17\t144\t146",
22881   "\t16\t144\t146",
22882   "\t15\t144\t146",
22883   "\t14\t144\t146",
22884   "\t13\t144\t146",
22885   "Norway",
22886   "\t72\t22\t29",
22887   "\t71\t17\t32",
22888   "\t70\t14\t32",
22889   "\t69\t11\t32",
22890   "\t68\t11\t31",
22891   "\t67\t11\t26",
22892   "\t66\t10\t18",
22893   "\t65\t8\t17",
22894   "\t64\t6\t15",
22895   "\t63\t3\t15",
22896   "\t62\t3\t13",
22897   "\t61\t3\t13",
22898   "\t60\t3\t13",
22899   "\t59\t3\t13",
22900   "\t58\t4\t13",
22901   "\t57\t4\t12",
22902   "\t56\t5\t8",
22903   "Oman",
22904   "\t27\t55\t57",
22905   "\t26\t55\t57",
22906   "\t25\t54\t58",
22907   "\t24\t54\t60",
22908   "\t23\t54\t60",
22909   "\t22\t54\t60",
22910   "\t21\t54\t60",
22911   "\t20\t51\t60",
22912   "\t19\t50\t59",
22913   "\t18\t50\t58",
22914   "\t17\t50\t58",
22915   "\t16\t51\t57",
22916   "\t15\t51\t55",
22917   "Pakistan",
22918   "\t38\t73\t75",
22919   "\t37\t70\t77",
22920   "\t36\t70\t78",
22921   "\t35\t68\t78",
22922   "\t34\t68\t78",
22923   "\t33\t68\t78",
22924   "\t32\t65\t76",
22925   "\t31\t65\t76",
22926   "\t30\t59\t75",
22927   "\t29\t59\t75",
22928   "\t28\t59\t74",
22929   "\t27\t60\t73",
22930   "\t26\t60\t72",
22931   "\t25\t60\t72",
22932   "\t24\t60\t72",
22933   "\t23\t65\t72",
22934   "\t22\t66\t69",
22935   "Palau",
22936   "\t9\t133\t135",
22937   "\t8\t133\t135",
22938   "\t7\t133\t135",
22939   "\t6\t131\t135",
22940   "\t5\t131\t135",
22941   "\t4\t130\t133",
22942   "\t3\t130\t132",
22943   "\t2\t130\t132",
22944   "\t1\t130\t132",
22945   "Palmyra Atoll",
22946   "\t7\t-163\t-161",
22947   "\t6\t-163\t-161",
22948   "\t5\t-163\t-161",
22949   "\t4\t-163\t-161",
22950   "Panama",
22951   "\t10\t-83\t-76",
22952   "\t9\t-84\t-76",
22953   "\t8\t-84\t-76",
22954   "\t7\t-84\t-76",
22955   "\t6\t-82\t-76",
22956   "Papua New Guinea",
22957   "\t0\t141\t143\t145\t151",
22958   "\t-1\t139\t143\t145\t153",
22959   "\t-2\t139\t155",
22960   "\t-3\t139\t155",
22961   "\t-4\t139\t156",
22962   "\t-5\t139\t156",
22963   "\t-6\t139\t156",
22964   "\t-7\t139\t156",
22965   "\t-8\t139\t154",
22966   "\t-9\t139\t154",
22967   "\t-10\t139\t155",
22968   "\t-11\t146\t155",
22969   "\t-12\t152\t155",
22970   "Paracel Islands",
22971   "\t18\t110\t112",
22972   "\t17\t110\t113",
22973   "\t16\t110\t113",
22974   "\t15\t110\t113",
22975   "Paraguay",
22976   "\t-18\t-62\t-57",
22977   "\t-19\t-63\t-56",
22978   "\t-20\t-63\t-56",
22979   "\t-21\t-63\t-54",
22980   "\t-22\t-63\t-53",
22981   "\t-23\t-63\t-53",
22982   "\t-24\t-62\t-53",
22983   "\t-25\t-61\t-53",
22984   "\t-26\t-59\t-53",
22985   "\t-27\t-59\t-53",
22986   "\t-28\t-59\t-54",
22987   "Peru",
22988   "\t1\t-76\t-73",
22989   "\t0\t-76\t-72",
22990   "\t-1\t-78\t-69",
22991   "\t-2\t-81\t-69",
22992   "\t-3\t-82\t-68",
22993   "\t-4\t-82\t-68",
22994   "\t-5\t-82\t-68",
22995   "\t-6\t-82\t-71",
22996   "\t-7\t-82\t-71",
22997   "\t-8\t-80\t-69",
22998   "\t-9\t-80\t-68",
22999   "\t-10\t-79\t-68",
23000   "\t-11\t-79\t-67",
23001   "\t-12\t-78\t-67",
23002   "\t-13\t-78\t-67",
23003   "\t-14\t-77\t-67",
23004   "\t-15\t-77\t-67",
23005   "\t-16\t-76\t-67",
23006   "\t-17\t-75\t-67",
23007   "\t-18\t-73\t-68",
23008   "\t-19\t-71\t-68",
23009   "Philippines",
23010   "\t22\t120\t122",
23011   "\t21\t120\t123",
23012   "\t20\t120\t123",
23013   "\t19\t119\t123",
23014   "\t18\t119\t123",
23015   "\t17\t118\t123",
23016   "\t16\t118\t123",
23017   "\t15\t118\t125",
23018   "\t14\t118\t125",
23019   "\t13\t118\t126",
23020   "\t12\t118\t126",
23021   "\t11\t117\t127",
23022   "\t10\t116\t127",
23023   "\t9\t115\t127",
23024   "\t8\t115\t127",
23025   "\t7\t115\t127",
23026   "\t6\t115\t127",
23027   "\t5\t117\t127",
23028   "\t4\t118\t126",
23029   "\t3\t118\t120",
23030   "Pitcairn Islands",
23031   "\t-22\t-131\t-129",
23032   "\t-23\t-131\t-127\t-125\t-123",
23033   "\t-24\t-131\t-127\t-125\t-123",
23034   "\t-25\t-131\t-127\t-125\t-123",
23035   "\t-26\t-131\t-129",
23036   "Poland",
23037   "\t55\t13\t24",
23038   "\t54\t13\t24",
23039   "\t53\t13\t24",
23040   "\t52\t13\t24",
23041   "\t51\t13\t25",
23042   "\t50\t13\t25",
23043   "\t49\t13\t25",
23044   "\t48\t16\t24",
23045   "\t47\t21\t23",
23046   "Portugal",
23047   "\t43\t-9\t-7",
23048   "\t42\t-9\t-5",
23049   "\t41\t-9\t-5",
23050   "\t40\t-10\t-5",
23051   "\t39\t-10\t-5",
23052   "\t38\t-10\t-5",
23053   "\t37\t-10\t-5",
23054   "\t36\t-9\t-6",
23055   "\t35\t-8\t-6",
23056   "Portugal: Azores",
23057   "\t40\t-32\t-26",
23058   "\t39\t-32\t-26",
23059   "\t38\t-32\t-24",
23060   "\t37\t-29\t-24",
23061   "\t36\t-26\t-24",
23062   "\t35\t-26\t-24",
23063   "Portugal: Madeira",
23064   "\t34\t-17\t-15",
23065   "\t33\t-18\t-15",
23066   "\t32\t-18\t-15",
23067   "\t31\t-18\t-14",
23068   "\t30\t-17\t-14",
23069   "\t29\t-17\t-14",
23070   "Puerto Rico",
23071   "\t19\t-68\t-64",
23072   "\t18\t-68\t-64",
23073   "\t17\t-68\t-64",
23074   "\t16\t-68\t-64",
23075   "Qatar",
23076   "\t27\t50\t52",
23077   "\t26\t49\t52",
23078   "\t25\t49\t52",
23079   "\t24\t49\t52",
23080   "\t23\t49\t52",
23081   "Republic of the Congo",
23082   "\t4\t15\t19",
23083   "\t3\t12\t19",
23084   "\t2\t12\t19",
23085   "\t1\t12\t19",
23086   "\t0\t11\t19",
23087   "\t-1\t10\t18",
23088   "\t-2\t10\t18",
23089   "\t-3\t10\t17",
23090   "\t-4\t10\t17",
23091   "\t-5\t10\t16",
23092   "\t-6\t10\t13",
23093   "Reunion",
23094   "\t-19\t54\t56",
23095   "\t-20\t54\t56",
23096   "\t-21\t54\t56",
23097   "\t-22\t54\t56",
23098   "Romania",
23099   "\t49\t21\t28",
23100   "\t48\t20\t29",
23101   "\t47\t19\t29",
23102   "\t46\t19\t30",
23103   "\t45\t19\t30",
23104   "\t44\t19\t30",
23105   "\t43\t20\t30",
23106   "\t42\t21\t29",
23107   "Russia",
23108   "\t82\t49\t51\t53\t66\t88\t97",
23109   "\t81\t35\t37\t43\t66\t77\t81\t88\t100",
23110   "\t80\t35\t37\t43\t66\t75\t81\t88\t105",
23111   "\t79\t35\t37\t43\t66\t75\t81\t89\t108",
23112   "\t78\t49\t52\t57\t60\t66\t68\t75\t78\t87\t108\t155\t157",
23113   "\t77\t59\t70\t87\t114\t136\t143\t147\t153\t155\t157",
23114   "\t76\t54\t70\t80\t114\t134\t153\t155\t157",
23115   "\t75\t53\t70\t78\t117\t134\t153",
23116   "\t74\t52\t130\t134\t151",
23117   "\t73\t50\t61\t67\t130\t134\t151",
23118   "\t72\t-180\t-174\t50\t59\t65\t159\t177\t180",
23119   "\t71\t-180\t-174\t50\t61\t65\t163\t167\t172\t177\t180",
23120   "\t70\t-180\t-174\t27\t37\t47\t180",
23121   "\t69\t-180\t-175\t27\t180",
23122   "\t68\t-180\t-171\t27\t180",
23123   "\t67\t-180\t-168\t27\t180",
23124   "\t66\t-180\t-167\t28\t180",
23125   "\t65\t-180\t-167\t28\t180",
23126   "\t64\t-180\t-167\t28\t180",
23127   "\t63\t-176\t-171\t28\t180",
23128   "\t62\t27\t180",
23129   "\t61\t25\t180",
23130   "\t60\t25\t175",
23131   "\t59\t25\t173",
23132   "\t58\t26\t167\t169\t171",
23133   "\t57\t26\t143\t150\t165",
23134   "\t56\t19\t23\t26\t141\t154\t167",
23135   "\t55\t18\t23\t26\t144\t154\t169",
23136   "\t54\t18\t23\t27\t144\t154\t169",
23137   "\t53\t18\t23\t29\t144\t154\t163\t165\t169",
23138   "\t52\t30\t63\t71\t144\t154\t161",
23139   "\t51\t30\t63\t77\t121\t124\t145\t154\t159",
23140   "\t50\t33\t62\t78\t121\t125\t145\t153\t159",
23141   "\t49\t34\t50\t53\t62\t78\t99\t101\t120\t126\t145\t152\t157",
23142   "\t48\t36\t49\t83\t90\t94\t98\t106\t120\t126\t145\t151\t156",
23143   "\t47\t36\t50\t129\t145\t149\t155",
23144   "\t46\t35\t50\t129\t144\t146\t154",
23145   "\t45\t35\t50\t129\t153",
23146   "\t44\t35\t49\t129\t138\t141\t151",
23147   "\t43\t36\t49\t129\t137\t144\t148",
23148   "\t42\t38\t49\t129\t136\t144\t147",
23149   "\t41\t42\t49\t129\t135",
23150   "\t40\t45\t49",
23151   "Rwanda",
23152   "\t0\t28\t31",
23153   "\t-1\t27\t31",
23154   "\t-2\t27\t31",
23155   "\t-3\t27\t31",
23156   "Saint Helena",
23157   "\t-6\t-15\t-13",
23158   "\t-7\t-15\t-13",
23159   "\t-8\t-15\t-13",
23160   "\t-14\t-6\t-4",
23161   "\t-15\t-6\t-4",
23162   "\t-16\t-6\t-4",
23163   "\t-17\t-6\t-4",
23164   "\t-36\t-13\t-11",
23165   "\t-37\t-13\t-11",
23166   "\t-38\t-13\t-11",
23167   "\t-39\t-11\t-8",
23168   "\t-40\t-11\t-8",
23169   "\t-41\t-11\t-8",
23170   "Saint Kitts and Nevis",
23171   "\t18\t-63\t-61",
23172   "\t17\t-63\t-61",
23173   "\t16\t-63\t-61",
23174   "Saint Lucia",
23175   "\t15\t-62\t-59",
23176   "\t14\t-62\t-59",
23177   "\t13\t-62\t-59",
23178   "\t12\t-62\t-59",
23179   "Saint Pierre and Miquelon",
23180   "\t48\t-57\t-55",
23181   "\t47\t-57\t-55",
23182   "\t46\t-57\t-55",
23183   "\t45\t-57\t-55",
23184   "Saint Vincent and the Grenadines",
23185   "\t14\t-62\t-60",
23186   "\t13\t-62\t-60",
23187   "\t12\t-62\t-60",
23188   "\t11\t-62\t-60",
23189   "Samoa",
23190   "\t-12\t-173\t-170",
23191   "\t-13\t-173\t-170",
23192   "\t-14\t-173\t-170",
23193   "\t-15\t-172\t-170",
23194   "San Marino",
23195   "\t44\t11\t13",
23196   "\t43\t11\t13",
23197   "\t42\t11\t13",
23198   "Sao Tome and Principe",
23199   "\t2\t6\t8",
23200   "\t1\t5\t8",
23201   "\t0\t5\t8",
23202   "\t-1\t5\t7",
23203   "Saudi Arabia",
23204   "\t33\t37\t40",
23205   "\t32\t35\t43",
23206   "\t31\t35\t44",
23207   "\t30\t33\t48",
23208   "\t29\t33\t49",
23209   "\t28\t33\t50",
23210   "\t27\t33\t51",
23211   "\t26\t34\t51",
23212   "\t25\t34\t52",
23213   "\t24\t35\t53",
23214   "\t23\t36\t56",
23215   "\t22\t37\t56",
23216   "\t21\t37\t56",
23217   "\t20\t37\t56",
23218   "\t19\t38\t56",
23219   "\t18\t39\t55",
23220   "\t17\t40\t52",
23221   "\t16\t40\t48",
23222   "\t15\t40\t48",
23223   "Senegal",
23224   "\t17\t-17\t-12",
23225   "\t16\t-17\t-11",
23226   "\t15\t-18\t-10",
23227   "\t14\t-18\t-10",
23228   "\t13\t-18\t-10",
23229   "\t12\t-17\t-10",
23230   "\t11\t-17\t-10",
23231   "Serbia",
23232   "\t47\t18\t21",
23233   "\t46\t17\t22",
23234   "\t45\t17\t23",
23235   "\t44\t17\t23",
23236   "\t43\t17\t23",
23237   "\t42\t18\t23",
23238   "\t41\t19\t23",
23239   "Seychelles",
23240   "\t-2\t54\t56",
23241   "\t-3\t54\t56",
23242   "\t-4\t52\t56",
23243   "\t-5\t51\t56",
23244   "\t-6\t51\t57",
23245   "\t-7\t51\t53\t55\t57",
23246   "\t-8\t45\t48\t51\t53\t55\t57",
23247   "\t-9\t45\t48",
23248   "\t-10\t45\t48",
23249   "Sierra Leone",
23250   "\t10\t-14\t-9",
23251   "\t9\t-14\t-9",
23252   "\t8\t-14\t-9",
23253   "\t7\t-14\t-9",
23254   "\t6\t-13\t-9",
23255   "\t5\t-12\t-10",
23256   "Singapore",
23257   "\t2\t102\t105",
23258   "\t1\t102\t105",
23259   "\t0\t102\t105",
23260   "Sint Maarten",
23261   "\t19\t-64\t-62",
23262   "\t18\t-64\t-62",
23263   "\t17\t-64\t-62",
23264   "Slovakia",
23265   "\t50\t16\t23",
23266   "\t49\t15\t23",
23267   "\t48\t15\t23",
23268   "\t47\t15\t23",
23269   "\t46\t16\t19",
23270   "Slovenia",
23271   "\t47\t12\t17",
23272   "\t46\t12\t17",
23273   "\t45\t12\t17",
23274   "\t44\t12\t16",
23275   "Solomon Islands",
23276   "\t-5\t154\t158",
23277   "\t-6\t154\t161",
23278   "\t-7\t154\t163",
23279   "\t-8\t154\t163\t166\t168",
23280   "\t-9\t155\t168",
23281   "\t-10\t158\t168",
23282   "\t-11\t158\t169",
23283   "\t-12\t158\t161\t165\t169",
23284   "\t-13\t167\t169",
23285   "Somalia",
23286   "\t12\t47\t52",
23287   "\t11\t47\t52",
23288   "\t10\t47\t52",
23289   "\t9\t47\t52",
23290   "\t8\t45\t51",
23291   "\t7\t44\t51",
23292   "\t6\t44\t50",
23293   "\t5\t40\t50",
23294   "\t4\t40\t49",
23295   "\t3\t39\t49",
23296   "\t2\t39\t48",
23297   "\t1\t39\t47",
23298   "\t0\t39\t46",
23299   "\t-1\t39\t44",
23300   "\t-2\t40\t42",
23301   "Somalia",
23302   "\t12\t42\t44\t46\t49",
23303   "\t11\t41\t49",
23304   "\t10\t41\t49",
23305   "\t9\t41\t49",
23306   "\t8\t42\t49",
23307   "\t7\t43\t49",
23308   "\t6\t45\t48",
23309   "South Africa",
23310   "\t-21\t26\t32",
23311   "\t-22\t25\t32",
23312   "\t-23\t18\t21\t24\t32",
23313   "\t-24\t18\t32",
23314   "\t-25\t18\t33",
23315   "\t-26\t18\t33",
23316   "\t-27\t15\t33",
23317   "\t-28\t15\t33",
23318   "\t-29\t15\t33",
23319   "\t-30\t15\t32",
23320   "\t-31\t16\t31",
23321   "\t-32\t16\t31",
23322   "\t-33\t16\t30",
23323   "\t-34\t16\t28",
23324   "\t-35\t17\t26",
23325   "\t-45\t36\t38",
23326   "\t-46\t36\t38",
23327   "\t-47\t36\t38",
23328   "South Georgia and the South Sandwich Islands",
23329   "\t-52\t-39\t-36",
23330   "\t-53\t-39\t-34",
23331   "\t-54\t-39\t-34",
23332   "\t-55\t-39\t-34\t-29\t-26",
23333   "\t-56\t-29\t-25",
23334   "\t-57\t-29\t-25",
23335   "\t-58\t-28\t-25",
23336   "\t-59\t-28\t-25",
23337   "\t-60\t-28\t-25",
23338   "South Korea",
23339   "\t39\t125\t129",
23340   "\t38\t123\t132",
23341   "\t37\t123\t132",
23342   "\t36\t123\t132",
23343   "\t35\t124\t130",
23344   "\t34\t124\t130",
23345   "\t33\t124\t129",
23346   "\t32\t125\t127",
23347   "South Sudan",
23348   "\t13\t31\t34",
23349   "\t12\t31\t34",
23350   "\t11\t23\t34",
23351   "\t10\t23\t35",
23352   "\t9\t23\t35",
23353   "\t8\t23\t35",
23354   "\t7\t23\t35",
23355   "\t6\t23\t36",
23356   "\t5\t25\t36",
23357   "\t4\t25\t36",
23358   "\t3\t26\t36",
23359   "\t2\t29\t34",
23360   "Spain",
23361   "\t44\t-10\t0",
23362   "\t43\t-10\t4",
23363   "\t42\t-10\t4",
23364   "\t41\t-10\t5",
23365   "\t40\t-9\t5",
23366   "\t39\t-8\t5",
23367   "\t38\t-8\t5",
23368   "\t37\t-8\t2",
23369   "\t36\t-8\t1",
23370   "\t35\t-7\t0",
23371   "\t34\t-6\t-1",
23372   "Spain: Canary Islands",
23373   "\t30\t-14\t-12",
23374   "\t29\t-19\t-12",
23375   "\t28\t-19\t-12",
23376   "\t27\t-19\t-12",
23377   "\t26\t-19\t-14",
23378   "Spratly Islands",
23379   "\t12\t113\t115",
23380   "\t11\t112\t115",
23381   "\t10\t112\t116",
23382   "\t9\t110\t116",
23383   "\t8\t110\t116",
23384   "\t7\t110\t116",
23385   "\t6\t112\t114",
23386   "\t5\t112\t114",
23387   "Sri Lanka",
23388   "\t10\t78\t81",
23389   "\t9\t78\t82",
23390   "\t8\t78\t82",
23391   "\t7\t78\t82",
23392   "\t6\t78\t82",
23393   "\t5\t78\t82",
23394   "\t4\t79\t81",
23395   "Sudan",
23396   "\t23\t30\t32",
23397   "\t22\t23\t38",
23398   "\t21\t23\t38",
23399   "\t20\t22\t38",
23400   "\t19\t22\t39",
23401   "\t18\t22\t39",
23402   "\t17\t22\t39",
23403   "\t16\t21\t39",
23404   "\t15\t21\t38",
23405   "\t14\t20\t37",
23406   "\t13\t20\t37",
23407   "\t12\t20\t37",
23408   "\t11\t20\t37",
23409   "\t10\t21\t36",
23410   "\t9\t21\t35",
23411   "\t8\t22\t35",
23412   "\t7\t22\t25",
23413   "Suriname",
23414   "\t7\t-57\t-55",
23415   "\t6\t-58\t-52",
23416   "\t5\t-59\t-52",
23417   "\t4\t-59\t-52",
23418   "\t3\t-59\t-52",
23419   "\t2\t-59\t-52",
23420   "\t1\t-58\t-53",
23421   "\t0\t-57\t-54",
23422   "Svalbard",
23423   "\t81\t15\t28\t30\t34",
23424   "\t80\t9\t34",
23425   "\t79\t9\t34",
23426   "\t78\t9\t31",
23427   "\t77\t9\t31",
23428   "\t76\t12\t26",
23429   "\t75\t14\t20\t23\t26",
23430   "\t74\t17\t20",
23431   "\t73\t17\t20",
23432   "Swaziland",
23433   "\t-24\t30\t33",
23434   "\t-25\t29\t33",
23435   "\t-26\t29\t33",
23436   "\t-27\t29\t33",
23437   "\t-28\t29\t32",
23438   "Sweden",
23439   "\t70\t19\t21",
23440   "\t69\t16\t24",
23441   "\t68\t15\t24",
23442   "\t67\t13\t25",
23443   "\t66\t13\t25",
23444   "\t65\t11\t25",
23445   "\t64\t10\t25",
23446   "\t63\t10\t22",
23447   "\t62\t10\t21",
23448   "\t61\t11\t19",
23449   "\t60\t10\t20",
23450   "\t59\t10\t20",
23451   "\t58\t10\t20",
23452   "\t57\t10\t20",
23453   "\t56\t10\t20",
23454   "\t55\t11\t19",
23455   "\t54\t11\t15",
23456   "Switzerland",
23457   "\t48\t5\t10",
23458   "\t47\t4\t11",
23459   "\t46\t4\t11",
23460   "\t45\t4\t11",
23461   "\t44\t5\t10",
23462   "Syria",
23463   "\t38\t39\t43",
23464   "\t37\t35\t43",
23465   "\t36\t34\t43",
23466   "\t35\t34\t43",
23467   "\t34\t34\t42",
23468   "\t33\t34\t42",
23469   "\t32\t34\t40",
23470   "\t31\t34\t39",
23471   "Taiwan",
23472   "\t26\t120\t123",
23473   "\t25\t117\t123",
23474   "\t24\t117\t123",
23475   "\t23\t117\t122",
23476   "\t22\t118\t122",
23477   "\t21\t119\t122",
23478   "\t20\t119\t121",
23479   "Tajikistan",
23480   "\t42\t69\t71",
23481   "\t41\t67\t71",
23482   "\t40\t66\t74",
23483   "\t39\t66\t75",
23484   "\t38\t66\t76",
23485   "\t37\t66\t76",
23486   "\t36\t66\t76",
23487   "\t35\t66\t73",
23488   "Tanzania",
23489   "\t1\t29\t31",
23490   "\t0\t29\t36",
23491   "\t-1\t29\t38",
23492   "\t-2\t29\t39",
23493   "\t-3\t28\t40",
23494   "\t-4\t28\t40",
23495   "\t-5\t28\t40",
23496   "\t-6\t28\t40",
23497   "\t-7\t28\t40",
23498   "\t-8\t29\t40",
23499   "\t-9\t29\t41",
23500   "\t-10\t30\t41",
23501   "\t-11\t33\t41",
23502   "\t-12\t33\t40",
23503   "Thailand",
23504   "\t21\t98\t101",
23505   "\t20\t96\t102",
23506   "\t19\t96\t105",
23507   "\t18\t96\t105",
23508   "\t17\t96\t106",
23509   "\t16\t96\t106",
23510   "\t15\t97\t106",
23511   "\t14\t97\t106",
23512   "\t13\t97\t106",
23513   "\t12\t97\t103",
23514   "\t11\t97\t103",
23515   "\t10\t96\t103",
23516   "\t9\t96\t101",
23517   "\t8\t96\t101",
23518   "\t7\t97\t103",
23519   "\t6\t97\t103",
23520   "\t5\t98\t103",
23521   "\t4\t99\t102",
23522   "Timor-Leste",
23523   "\t-7\t123\t128",
23524   "\t-8\t123\t128",
23525   "\t-9\t123\t128",
23526   "\t-10\t123\t127",
23527   "Togo",
23528   "\t12\t-1\t1",
23529   "\t11\t-1\t2",
23530   "\t10\t-1\t2",
23531   "\t9\t-1\t2",
23532   "\t8\t-1\t2",
23533   "\t7\t-1\t2",
23534   "\t6\t-1\t2",
23535   "\t5\t-1\t2",
23536   "Tokelau",
23537   "\t-8\t-172\t-170",
23538   "\t-9\t-172\t-170",
23539   "\t-10\t-172\t-170",
23540   "Tonga",
23541   "\t-14\t-176\t-174",
23542   "\t-15\t-176\t-174",
23543   "\t-16\t-176\t-174",
23544   "\t-17\t-175\t-172",
23545   "\t-18\t-176\t-172",
23546   "\t-19\t-176\t-172",
23547   "\t-20\t-176\t-173",
23548   "\t-21\t-177\t-173",
23549   "\t-22\t-177\t-173",
23550   "\t-23\t-177\t-175",
23551   "Trinidad and Tobago",
23552   "\t12\t-61\t-59",
23553   "\t11\t-62\t-59",
23554   "\t10\t-62\t-59",
23555   "\t9\t-62\t-59",
23556   "Tromelin Island",
23557   "\t-14\t53\t55",
23558   "\t-15\t53\t55",
23559   "\t-16\t53\t55",
23560   "Tunisia",
23561   "\t38\t7\t12",
23562   "\t37\t7\t12",
23563   "\t36\t7\t12",
23564   "\t35\t6\t12",
23565   "\t34\t6\t12",
23566   "\t33\t6\t12",
23567   "\t32\t6\t12",
23568   "\t31\t7\t12",
23569   "\t30\t8\t11",
23570   "\t29\t8\t11",
23571   "Turkey",
23572   "\t43\t25\t28\t32\t36",
23573   "\t42\t25\t44",
23574   "\t41\t24\t45",
23575   "\t40\t24\t45",
23576   "\t39\t24\t45",
23577   "\t38\t24\t45",
23578   "\t37\t25\t45",
23579   "\t36\t26\t45",
23580   "\t35\t26\t41\t43\t45",
23581   "\t34\t34\t37",
23582   "Turkmenistan",
23583   "\t43\t51\t61",
23584   "\t42\t51\t62",
23585   "\t41\t51\t63",
23586   "\t40\t51\t64",
23587   "\t39\t51\t67",
23588   "\t38\t51\t67",
23589   "\t37\t52\t67",
23590   "\t36\t52\t67",
23591   "\t35\t59\t65",
23592   "\t34\t60\t65",
23593   "Turks and Caicos Islands",
23594   "\t22\t-73\t-70",
23595   "\t21\t-73\t-70",
23596   "\t20\t-73\t-70",
23597   "Tuvalu",
23598   "\t-4\t175\t177",
23599   "\t-5\t175\t178",
23600   "\t-6\t175\t179",
23601   "\t-7\t175\t180",
23602   "\t-8\t176\t180",
23603   "\t-9\t177\t180",
23604   "\t-10\t178\t180",
23605   "Uganda",
23606   "\t5\t32\t35",
23607   "\t4\t29\t35",
23608   "\t3\t29\t35",
23609   "\t2\t29\t36",
23610   "\t1\t28\t36",
23611   "\t0\t28\t36",
23612   "\t-1\t28\t35",
23613   "\t-2\t28\t34",
23614   "Ukraine",
23615   "\t53\t29\t35",
23616   "\t52\t22\t36",
23617   "\t51\t22\t39",
23618   "\t50\t21\t41",
23619   "\t49\t21\t41",
23620   "\t48\t21\t41",
23621   "\t47\t21\t41",
23622   "\t46\t21\t40",
23623   "\t45\t27\t38",
23624   "\t44\t27\t37",
23625   "\t43\t32\t36",
23626   "United Arab Emirates",
23627   "\t27\t55\t57",
23628   "\t26\t54\t57",
23629   "\t25\t50\t57",
23630   "\t24\t50\t57",
23631   "\t23\t50\t57",
23632   "\t22\t50\t56",
23633   "\t21\t51\t56",
23634   "United Kingdom",
23635   "\t61\t-3\t1",
23636   "\t60\t-4\t1",
23637   "\t59\t-8\t1",
23638   "\t58\t-14\t-12\t-9\t0",
23639   "\t57\t-14\t-12\t-9\t0",
23640   "\t56\t-14\t-12\t-9\t0",
23641   "\t55\t-9\t1",
23642   "\t54\t-9\t1",
23643   "\t53\t-9\t2",
23644   "\t52\t-6\t2",
23645   "\t51\t-6\t2",
23646   "\t50\t-7\t2",
23647   "\t49\t-7\t1",
23648   "\t48\t-7\t-4",
23649   "Uruguay",
23650   "\t-29\t-58\t-54",
23651   "\t-30\t-59\t-52",
23652   "\t-31\t-59\t-52",
23653   "\t-32\t-59\t-52",
23654   "\t-33\t-59\t-52",
23655   "\t-34\t-59\t-52",
23656   "\t-35\t-59\t-52",
23657   "USA",
23658   "\t72\t-158\t-153",
23659   "\t71\t-163\t-141",
23660   "\t70\t-164\t-140",
23661   "\t69\t-167\t-140",
23662   "\t68\t-167\t-140",
23663   "\t67\t-167\t-140",
23664   "\t66\t-169\t-140",
23665   "\t65\t-169\t-140",
23666   "\t64\t-172\t-140",
23667   "\t63\t-172\t-140",
23668   "\t62\t-172\t-140",
23669   "\t61\t-174\t-138",
23670   "\t60\t-174\t-171\t-168\t-133",
23671   "\t59\t-174\t-171\t-168\t-132",
23672   "\t58\t-171\t-131",
23673   "\t57\t-171\t-168\t-163\t-150\t-138\t-129",
23674   "\t56\t-171\t-168\t-164\t-151\t-137\t-128",
23675   "\t55\t-170\t-152\t-136\t-128",
23676   "\t54\t-170\t-154\t-135\t-128\t171\t173",
23677   "\t53\t-177\t-158\t-134\t-129\t171\t180",
23678   "\t52\t-180\t-165\t171\t180",
23679   "\t51\t-180\t-167\t171\t180",
23680   "\t50\t-180\t-174\t-96\t-93\t176\t180",
23681   "\t49\t-125\t-86",
23682   "\t48\t-125\t-84\t-70\t-66",
23683   "\t47\t-125\t-82\t-71\t-66",
23684   "\t46\t-125\t-81\t-75\t-65",
23685   "\t45\t-125\t-81\t-77\t-65",
23686   "\t44\t-125\t-65",
23687   "\t43\t-125\t-65",
23688   "\t42\t-125\t-68",
23689   "\t41\t-125\t-68",
23690   "\t40\t-125\t-68",
23691   "\t39\t-125\t-71",
23692   "\t38\t-125\t-73",
23693   "\t37\t-124\t-73",
23694   "\t36\t-124\t-74",
23695   "\t35\t-123\t-74",
23696   "\t34\t-122\t-74",
23697   "\t33\t-121\t-75",
23698   "\t32\t-121\t-76",
23699   "\t31\t-119\t-78",
23700   "\t30\t-114\t-79",
23701   "\t29\t-179\t-177\t-106\t-79",
23702   "\t28\t-179\t-177\t-105\t-79",
23703   "\t27\t-179\t-177\t-174\t-172\t-104\t-94\t-90\t-88\t-83\t-79",
23704   "\t26\t-174\t-166\t-100\t-95\t-83\t-79",
23705   "\t25\t-174\t-166\t-100\t-96\t-83\t-79",
23706   "\t24\t-172\t-160\t-98\t-96\t-83\t-79",
23707   "\t23\t-165\t-158\t-83\t-79",
23708   "\t22\t-165\t-155",
23709   "\t21\t-161\t-154",
23710   "\t20\t-161\t-153",
23711   "\t19\t-158\t-153",
23712   "\t18\t-157\t-153",
23713   "\t17\t-156\t-154",
23714   "USA: Alabama",
23715   "\t36\t-89\t-86",
23716   "\t35\t-89\t-84",
23717   "\t34\t-89\t-84",
23718   "\t33\t-89\t-83",
23719   "\t32\t-89\t-83",
23720   "\t31\t-89\t-83",
23721   "\t30\t-89\t-83",
23722   "\t29\t-89\t-86",
23723   "USA: Alaska",
23724   "\t72\t-158\t-153",
23725   "\t71\t-163\t-141",
23726   "\t70\t-164\t-140",
23727   "\t69\t-167\t-140",
23728   "\t68\t-167\t-140",
23729   "\t67\t-167\t-140",
23730   "\t66\t-169\t-140",
23731   "\t65\t-169\t-140",
23732   "\t64\t-172\t-140",
23733   "\t63\t-172\t-140",
23734   "\t62\t-172\t-140",
23735   "\t61\t-174\t-138",
23736   "\t60\t-174\t-171\t-168\t-133",
23737   "\t59\t-174\t-171\t-168\t-132",
23738   "\t58\t-171\t-131",
23739   "\t57\t-171\t-168\t-163\t-150\t-138\t-129",
23740   "\t56\t-171\t-168\t-164\t-151\t-137\t-128",
23741   "\t55\t-170\t-152\t-136\t-128",
23742   "\t54\t-170\t-154\t-135\t-128\t171\t173",
23743   "\t53\t-177\t-158\t-134\t-129\t171\t180",
23744   "\t52\t-180\t-165\t171\t180",
23745   "\t51\t-180\t-167\t171\t180",
23746   "\t50\t-180\t-174\t176\t180",
23747   "USA: Alaska, Aleutian Islands",
23748   "\t60\t-154\t-149\t-147\t-145",
23749   "\t59\t-162\t-158\t-154\t-149\t-147\t-145",
23750   "\t58\t-171\t-169\t-162\t-149\t-147\t-145",
23751   "\t57\t-171\t-168\t-162\t-150",
23752   "\t56\t-171\t-168\t-164\t-151",
23753   "\t55\t-170\t-152",
23754   "\t54\t-170\t-154",
23755   "\t53\t-177\t-158",
23756   "\t52\t-180\t-165",
23757   "\t51\t-180\t-167",
23758   "\t50\t-180\t-174",
23759   "USA: Arizona",
23760   "\t38\t-115\t-108",
23761   "\t37\t-115\t-108",
23762   "\t36\t-115\t-108",
23763   "\t35\t-115\t-108",
23764   "\t34\t-115\t-108",
23765   "\t33\t-115\t-108",
23766   "\t32\t-115\t-108",
23767   "\t31\t-115\t-108",
23768   "\t30\t-114\t-108",
23769   "USA: Arkansas",
23770   "\t37\t-95\t-88",
23771   "\t36\t-95\t-88",
23772   "\t35\t-95\t-88",
23773   "\t34\t-95\t-88",
23774   "\t33\t-95\t-89",
23775   "\t32\t-95\t-90",
23776   "USA: California",
23777   "\t43\t-125\t-119",
23778   "\t42\t-125\t-119",
23779   "\t41\t-125\t-119",
23780   "\t40\t-125\t-119",
23781   "\t39\t-125\t-117",
23782   "\t38\t-125\t-116",
23783   "\t37\t-124\t-115",
23784   "\t36\t-124\t-113",
23785   "\t35\t-123\t-113",
23786   "\t34\t-122\t-113",
23787   "\t33\t-121\t-113",
23788   "\t32\t-121\t-113",
23789   "\t31\t-119\t-113",
23790   "USA: Colorado",
23791   "\t42\t-110\t-101",
23792   "\t41\t-110\t-101",
23793   "\t40\t-110\t-101",
23794   "\t39\t-110\t-101",
23795   "\t38\t-110\t-101",
23796   "\t37\t-110\t-101",
23797   "\t36\t-110\t-101",
23798   "USA: Connecticut",
23799   "\t43\t-74\t-70",
23800   "\t42\t-74\t-70",
23801   "\t41\t-74\t-70",
23802   "\t40\t-74\t-70",
23803   "USA: Delaware",
23804   "\t40\t-76\t-74",
23805   "\t39\t-76\t-74",
23806   "\t38\t-76\t-74",
23807   "\t37\t-76\t-74",
23808   "USA: District of Columbia",
23809   "\t39\t-78\t-75",
23810   "\t38\t-78\t-75",
23811   "\t37\t-78\t-75",
23812   "USA: Florida",
23813   "\t32\t-88\t-84",
23814   "\t31\t-88\t-80",
23815   "\t30\t-88\t-79",
23816   "\t29\t-88\t-79",
23817   "\t28\t-86\t-79",
23818   "\t27\t-83\t-79",
23819   "\t26\t-83\t-79",
23820   "\t25\t-83\t-79",
23821   "\t24\t-83\t-79",
23822   "\t23\t-83\t-79",
23823   "USA: Georgia",
23824   "\t36\t-84\t-82",
23825   "\t35\t-86\t-81",
23826   "\t34\t-86\t-80",
23827   "\t33\t-86\t-79",
23828   "\t32\t-86\t-79",
23829   "\t31\t-86\t-79",
23830   "\t30\t-86\t-79",
23831   "\t29\t-86\t-80",
23832   "USA: Hawaii",
23833   "\t29\t-179\t-177",
23834   "\t28\t-179\t-174",
23835   "\t27\t-179\t-172",
23836   "\t26\t-176\t-166",
23837   "\t25\t-174\t-166",
23838   "\t24\t-172\t-160",
23839   "\t23\t-167\t-158",
23840   "\t22\t-167\t-155",
23841   "\t21\t-161\t-154",
23842   "\t20\t-161\t-153",
23843   "\t19\t-158\t-153",
23844   "\t18\t-157\t-153",
23845   "\t17\t-156\t-154",
23846   "USA: Idaho",
23847   "\t49\t-118\t-115",
23848   "\t48\t-118\t-114",
23849   "\t47\t-118\t-113",
23850   "\t46\t-118\t-112",
23851   "\t45\t-118\t-110",
23852   "\t44\t-118\t-110",
23853   "\t43\t-118\t-110",
23854   "\t42\t-118\t-110",
23855   "\t41\t-118\t-110",
23856   "USA: Illinois",
23857   "\t43\t-91\t-86",
23858   "\t42\t-92\t-86",
23859   "\t41\t-92\t-86",
23860   "\t40\t-92\t-86",
23861   "\t39\t-92\t-86",
23862   "\t38\t-92\t-86",
23863   "\t37\t-91\t-86",
23864   "\t36\t-91\t-87",
23865   "\t35\t-90\t-88",
23866   "USA: Indiana",
23867   "\t42\t-88\t-83",
23868   "\t41\t-88\t-83",
23869   "\t40\t-88\t-83",
23870   "\t39\t-89\t-83",
23871   "\t38\t-89\t-83",
23872   "\t37\t-89\t-83",
23873   "\t36\t-89\t-85",
23874   "USA: Iowa",
23875   "\t44\t-97\t-90",
23876   "\t43\t-97\t-89",
23877   "\t42\t-97\t-89",
23878   "\t41\t-97\t-89",
23879   "\t40\t-97\t-89",
23880   "\t39\t-96\t-89",
23881   "USA: Kansas",
23882   "\t41\t-103\t-94",
23883   "\t40\t-103\t-93",
23884   "\t39\t-103\t-93",
23885   "\t38\t-103\t-93",
23886   "\t37\t-103\t-93",
23887   "\t36\t-103\t-93",
23888   "USA: Kentucky",
23889   "\t40\t-85\t-83",
23890   "\t39\t-87\t-81",
23891   "\t38\t-90\t-80",
23892   "\t37\t-90\t-80",
23893   "\t36\t-90\t-80",
23894   "\t35\t-90\t-81",
23895   "USA: Louisiana",
23896   "\t34\t-95\t-90",
23897   "\t33\t-95\t-89",
23898   "\t32\t-95\t-88",
23899   "\t31\t-95\t-87",
23900   "\t30\t-95\t-87",
23901   "\t29\t-94\t-87",
23902   "\t28\t-94\t-87",
23903   "\t27\t-90\t-88",
23904   "USA: Maine",
23905   "\t48\t-70\t-66",
23906   "\t47\t-71\t-66",
23907   "\t46\t-72\t-65",
23908   "\t45\t-72\t-65",
23909   "\t44\t-72\t-65",
23910   "\t43\t-72\t-65",
23911   "\t42\t-71\t-67",
23912   "\t41\t-71\t-69",
23913   "USA: Maryland",
23914   "\t40\t-80\t-74",
23915   "\t39\t-80\t-74",
23916   "\t38\t-80\t-74",
23917   "\t37\t-78\t-74",
23918   "\t36\t-77\t-74",
23919   "USA: Massachusetts",
23920   "\t43\t-74\t-69",
23921   "\t42\t-74\t-68",
23922   "\t41\t-74\t-68",
23923   "\t40\t-72\t-68",
23924   "USA: Michigan",
23925   "\t49\t-90\t-86",
23926   "\t48\t-91\t-84",
23927   "\t47\t-91\t-82",
23928   "\t46\t-91\t-81",
23929   "\t45\t-91\t-81",
23930   "\t44\t-89\t-81",
23931   "\t43\t-88\t-81",
23932   "\t42\t-88\t-81",
23933   "\t41\t-88\t-81",
23934   "\t40\t-88\t-82",
23935   "USA: Minnesota",
23936   "\t50\t-96\t-93",
23937   "\t49\t-98\t-88",
23938   "\t48\t-98\t-88",
23939   "\t47\t-98\t-88",
23940   "\t46\t-98\t-88",
23941   "\t45\t-97\t-90",
23942   "\t44\t-97\t-90",
23943   "\t43\t-97\t-90",
23944   "\t42\t-97\t-90",
23945   "USA: Mississippi",
23946   "\t36\t-91\t-88",
23947   "\t35\t-92\t-87",
23948   "\t34\t-92\t-87",
23949   "\t33\t-92\t-87",
23950   "\t32\t-92\t-87",
23951   "\t31\t-92\t-87",
23952   "\t30\t-92\t-87",
23953   "\t29\t-90\t-87",
23954   "USA: Missouri",
23955   "\t41\t-96\t-90",
23956   "\t40\t-96\t-89",
23957   "\t39\t-96\t-89",
23958   "\t38\t-96\t-88",
23959   "\t37\t-95\t-88",
23960   "\t36\t-95\t-88",
23961   "\t35\t-95\t-88",
23962   "\t34\t-91\t-88",
23963   "USA: Montana",
23964   "\t49\t-117\t-103",
23965   "\t48\t-117\t-103",
23966   "\t47\t-117\t-103",
23967   "\t46\t-117\t-103",
23968   "\t45\t-116\t-103",
23969   "\t44\t-115\t-103",
23970   "\t43\t-114\t-110",
23971   "USA: Nebraska",
23972   "\t44\t-105\t-97",
23973   "\t43\t-105\t-95",
23974   "\t42\t-105\t-94",
23975   "\t41\t-105\t-94",
23976   "\t40\t-105\t-94",
23977   "\t39\t-103\t-94",
23978   "USA: Nevada",
23979   "\t43\t-121\t-113",
23980   "\t42\t-121\t-113",
23981   "\t41\t-121\t-113",
23982   "\t40\t-121\t-113",
23983   "\t39\t-121\t-113",
23984   "\t38\t-121\t-113",
23985   "\t37\t-120\t-113",
23986   "\t36\t-119\t-113",
23987   "\t35\t-118\t-113",
23988   "\t34\t-116\t-113",
23989   "USA: New Hampshire",
23990   "\t46\t-72\t-70",
23991   "\t45\t-73\t-69",
23992   "\t44\t-73\t-69",
23993   "\t43\t-73\t-69",
23994   "\t42\t-73\t-69",
23995   "\t41\t-73\t-69",
23996   "USA: New Jersey",
23997   "\t42\t-76\t-73",
23998   "\t41\t-76\t-72",
23999   "\t40\t-76\t-72",
24000   "\t39\t-76\t-72",
24001   "\t38\t-76\t-73",
24002   "\t37\t-75\t-73",
24003   "USA: New Mexico",
24004   "\t38\t-110\t-102",
24005   "\t37\t-110\t-102",
24006   "\t36\t-110\t-102",
24007   "\t35\t-110\t-102",
24008   "\t34\t-110\t-102",
24009   "\t33\t-110\t-102",
24010   "\t32\t-110\t-102",
24011   "\t31\t-110\t-102",
24012   "\t30\t-110\t-105",
24013   "USA: New York",
24014   "\t46\t-75\t-72",
24015   "\t45\t-77\t-72",
24016   "\t44\t-80\t-72",
24017   "\t43\t-80\t-72",
24018   "\t42\t-80\t-70",
24019   "\t41\t-80\t-70",
24020   "\t40\t-79\t-70",
24021   "\t39\t-75\t-71",
24022   "USA: North Carolina",
24023   "\t37\t-83\t-74",
24024   "\t36\t-85\t-74",
24025   "\t35\t-85\t-74",
24026   "\t34\t-85\t-74",
24027   "\t33\t-85\t-75",
24028   "\t32\t-79\t-76",
24029   "USA: North Dakota",
24030   "\t49\t-105\t-96",
24031   "\t48\t-105\t-95",
24032   "\t47\t-105\t-95",
24033   "\t46\t-105\t-95",
24034   "\t45\t-105\t-95",
24035   "\t44\t-105\t-95",
24036   "USA: Ohio",
24037   "\t43\t-82\t-79",
24038   "\t42\t-85\t-79",
24039   "\t41\t-85\t-79",
24040   "\t40\t-85\t-79",
24041   "\t39\t-85\t-79",
24042   "\t38\t-85\t-79",
24043   "\t37\t-85\t-80",
24044   "USA: Oklahoma",
24045   "\t38\t-104\t-93",
24046   "\t37\t-104\t-93",
24047   "\t36\t-104\t-93",
24048   "\t35\t-104\t-93",
24049   "\t34\t-101\t-93",
24050   "\t33\t-101\t-93",
24051   "\t32\t-98\t-93",
24052   "USA: Oregon",
24053   "\t47\t-125\t-115",
24054   "\t46\t-125\t-115",
24055   "\t45\t-125\t-115",
24056   "\t44\t-125\t-115",
24057   "\t43\t-125\t-115",
24058   "\t42\t-125\t-115",
24059   "\t41\t-125\t-116",
24060   "USA: Pennsylvania",
24061   "\t43\t-81\t-77",
24062   "\t42\t-81\t-73",
24063   "\t41\t-81\t-73",
24064   "\t40\t-81\t-73",
24065   "\t39\t-81\t-73",
24066   "\t38\t-81\t-74",
24067   "USA: Rhode Island",
24068   "\t43\t-72\t-70",
24069   "\t42\t-72\t-70",
24070   "\t41\t-72\t-70",
24071   "\t40\t-72\t-70",
24072   "USA: South Carolina",
24073   "\t36\t-84\t-79",
24074   "\t35\t-84\t-77",
24075   "\t34\t-84\t-77",
24076   "\t33\t-84\t-77",
24077   "\t32\t-83\t-77",
24078   "\t31\t-82\t-78",
24079   "USA: South Dakota",
24080   "\t46\t-105\t-95",
24081   "\t45\t-105\t-95",
24082   "\t44\t-105\t-95",
24083   "\t43\t-105\t-95",
24084   "\t42\t-105\t-95",
24085   "\t41\t-99\t-95",
24086   "USA: Tennessee",
24087   "\t37\t-90\t-80",
24088   "\t36\t-91\t-80",
24089   "\t35\t-91\t-80",
24090   "\t34\t-91\t-81",
24091   "\t33\t-90\t-83",
24092   "USA: Texas",
24093   "\t37\t-104\t-99",
24094   "\t36\t-104\t-99",
24095   "\t35\t-104\t-97",
24096   "\t34\t-104\t-93",
24097   "\t33\t-107\t-93",
24098   "\t32\t-107\t-92",
24099   "\t31\t-107\t-92",
24100   "\t30\t-107\t-92",
24101   "\t29\t-106\t-92",
24102   "\t28\t-105\t-92",
24103   "\t27\t-104\t-94",
24104   "\t26\t-100\t-95",
24105   "\t25\t-100\t-96",
24106   "\t24\t-98\t-96",
24107   "USA: Utah",
24108   "\t43\t-115\t-110",
24109   "\t42\t-115\t-108",
24110   "\t41\t-115\t-108",
24111   "\t40\t-115\t-108",
24112   "\t39\t-115\t-108",
24113   "\t38\t-115\t-108",
24114   "\t37\t-115\t-108",
24115   "\t36\t-115\t-108",
24116   "USA: Vermont",
24117   "\t46\t-74\t-70",
24118   "\t45\t-74\t-70",
24119   "\t44\t-74\t-70",
24120   "\t43\t-74\t-70",
24121   "\t42\t-74\t-71",
24122   "\t41\t-74\t-71",
24123   "USA: Virginia",
24124   "\t40\t-79\t-76",
24125   "\t39\t-81\t-74",
24126   "\t38\t-83\t-74",
24127   "\t37\t-84\t-74",
24128   "\t36\t-84\t-74",
24129   "\t35\t-84\t-74",
24130   "USA: Washington",
24131   "\t49\t-125\t-116",
24132   "\t48\t-125\t-116",
24133   "\t47\t-125\t-115",
24134   "\t46\t-125\t-115",
24135   "\t45\t-125\t-115",
24136   "\t44\t-123\t-118",
24137   "USA: West Virginia",
24138   "\t41\t-81\t-79",
24139   "\t40\t-83\t-76",
24140   "\t39\t-83\t-76",
24141   "\t38\t-83\t-76",
24142   "\t37\t-83\t-77",
24143   "\t36\t-83\t-79",
24144   "USA: Wisconsin",
24145   "\t48\t-92\t-88",
24146   "\t47\t-93\t-87",
24147   "\t46\t-93\t-85",
24148   "\t45\t-93\t-85",
24149   "\t44\t-93\t-85",
24150   "\t43\t-93\t-85",
24151   "\t42\t-92\t-86",
24152   "\t41\t-92\t-86",
24153   "USA: Wyoming",
24154   "\t46\t-112\t-103",
24155   "\t45\t-112\t-103",
24156   "\t44\t-112\t-103",
24157   "\t43\t-112\t-103",
24158   "\t42\t-112\t-103",
24159   "\t41\t-112\t-103",
24160   "\t40\t-112\t-103",
24161   "Uzbekistan",
24162   "\t46\t55\t60",
24163   "\t45\t54\t62",
24164   "\t44\t54\t66",
24165   "\t43\t54\t67\t69\t72",
24166   "\t42\t54\t73",
24167   "\t41\t54\t74",
24168   "\t40\t54\t74",
24169   "\t39\t60\t74",
24170   "\t38\t61\t72",
24171   "\t37\t63\t69",
24172   "\t36\t65\t69",
24173   "Vanuatu",
24174   "\t-12\t165\t168",
24175   "\t-13\t165\t169",
24176   "\t-14\t165\t169",
24177   "\t-15\t165\t169",
24178   "\t-16\t165\t169",
24179   "\t-17\t166\t170",
24180   "\t-18\t167\t170",
24181   "\t-19\t167\t170",
24182   "\t-20\t168\t170",
24183   "\t-21\t168\t170",
24184   "Venezuela",
24185   "\t16\t-64\t-62",
24186   "\t15\t-64\t-62",
24187   "\t14\t-64\t-62",
24188   "\t13\t-71\t-66",
24189   "\t12\t-73\t-62",
24190   "\t11\t-73\t-60",
24191   "\t10\t-74\t-59",
24192   "\t9\t-74\t-58",
24193   "\t8\t-74\t-58",
24194   "\t7\t-73\t-58",
24195   "\t6\t-73\t-59",
24196   "\t5\t-72\t-59",
24197   "\t4\t-68\t-59",
24198   "\t3\t-68\t-59",
24199   "\t2\t-68\t-61",
24200   "\t1\t-68\t-62",
24201   "\t0\t-68\t-62",
24202   "\t-1\t-67\t-64",
24203   "Viet Nam",
24204   "\t24\t103\t106",
24205   "\t23\t101\t107",
24206   "\t22\t101\t108",
24207   "\t21\t101\t108",
24208   "\t20\t101\t108",
24209   "\t19\t102\t108",
24210   "\t18\t102\t108",
24211   "\t17\t103\t109",
24212   "\t16\t104\t109",
24213   "\t15\t105\t110",
24214   "\t14\t106\t110",
24215   "\t13\t105\t110",
24216   "\t12\t104\t110",
24217   "\t11\t102\t110",
24218   "\t10\t102\t110",
24219   "\t9\t102\t109",
24220   "\t8\t103\t107",
24221   "\t7\t103\t107",
24222   "Virgin Islands",
24223   "\t19\t-66\t-63",
24224   "\t18\t-66\t-63",
24225   "\t17\t-66\t-63",
24226   "\t16\t-65\t-63",
24227   "Wake Island",
24228   "\t20\t165\t167",
24229   "\t19\t165\t167",
24230   "\t18\t165\t167",
24231   "Wallis and Futuna",
24232   "\t-12\t-177\t-175",
24233   "\t-13\t-179\t-175",
24234   "\t-14\t-179\t-175",
24235   "\t-15\t-179\t-177",
24236   "West Bank",
24237   "\t33\t33\t36",
24238   "\t32\t33\t36",
24239   "\t31\t33\t36",
24240   "\t30\t33\t36",
24241   "Western Sahara",
24242   "\t28\t-11\t-7",
24243   "\t27\t-13\t-7",
24244   "\t26\t-13\t-7",
24245   "\t25\t-14\t-7",
24246   "\t24\t-15\t-7",
24247   "\t23\t-15\t-11",
24248   "\t22\t-18\t-11",
24249   "\t21\t-18\t-12",
24250   "\t20\t-18\t-12",
24251   "\t19\t-18\t-16",
24252   "Yemen",
24253   "\t19\t47\t53",
24254   "\t18\t42\t53",
24255   "\t17\t41\t54",
24256   "\t16\t41\t54",
24257   "\t15\t41\t54",
24258   "\t14\t41\t53",
24259   "\t13\t41\t55",
24260   "\t12\t41\t49\t51\t55",
24261   "\t11\t42\t46\t51\t55",
24262   "Zambia",
24263   "\t-7\t27\t32",
24264   "\t-8\t27\t34",
24265   "\t-9\t22\t25\t27\t34",
24266   "\t-10\t22\t34",
24267   "\t-11\t22\t34",
24268   "\t-12\t20\t34",
24269   "\t-13\t20\t34",
24270   "\t-14\t20\t34",
24271   "\t-15\t20\t34",
24272   "\t-16\t20\t31",
24273   "\t-17\t20\t29",
24274   "\t-18\t21\t28",
24275   "\t-19\t24\t27",
24276   "Zimbabwe",
24277   "\t-14\t27\t32",
24278   "\t-15\t26\t33",
24279   "\t-16\t24\t34",
24280   "\t-17\t24\t34",
24281   "\t-18\t24\t34",
24282   "\t-19\t24\t34",
24283   "\t-20\t24\t34",
24284   "\t-21\t25\t34",
24285   "\t-22\t26\t33",
24286   "\t-23\t28\t32",
24287   NULL
24288 };
24289 
24290 extern CharPtr water_onedegree [];
24291 CharPtr water_onedegree [] = {
24292   "1",
24293   "Adriatic Sea",
24294   "\t46\t11\t15",
24295   "\t45\t11\t16",
24296   "\t44\t11\t18",
24297   "\t43\t11\t20",
24298   "\t42\t11\t20",
24299   "\t41\t12\t20",
24300   "\t40\t14\t20",
24301   "\t39\t16\t20",
24302   "\t38\t17\t20",
24303   "Aegean Sea",
24304   "\t41\t21\t27",
24305   "\t40\t21\t27",
24306   "\t39\t21\t28",
24307   "\t38\t21\t29",
24308   "\t37\t21\t29",
24309   "\t36\t23\t29",
24310   "\t35\t23\t29",
24311   "Albemarle Sound",
24312   "\t37\t-77\t-74",
24313   "\t36\t-77\t-74",
24314   "\t35\t-77\t-74",
24315   "\t34\t-77\t-74",
24316   "Alboran Sea",
24317   "\t37\t-6\t-1",
24318   "\t36\t-6\t0",
24319   "\t35\t-6\t0",
24320   "\t34\t-6\t0",
24321   "Amundsen Gulf",
24322   "\t72\t-126\t-117",
24323   "\t71\t-128\t-116",
24324   "\t70\t-128\t-116",
24325   "\t69\t-128\t-116",
24326   "\t68\t-127\t-117",
24327   "Amundsen Sea",
24328   "\t-71\t-108\t-101",
24329   "\t-72\t-115\t-97",
24330   "\t-73\t-115\t-97",
24331   "\t-74\t-115\t-97",
24332   "\t-75\t-115\t-97",
24333   "\t-76\t-112\t-97",
24334   "Andaman Sea",
24335   "\t17\t93\t96",
24336   "\t16\t92\t97",
24337   "\t15\t91\t99",
24338   "\t14\t91\t99",
24339   "\t13\t91\t99",
24340   "\t12\t91\t99",
24341   "\t11\t91\t99",
24342   "\t10\t91\t99",
24343   "\t9\t91\t99",
24344   "\t8\t91\t99",
24345   "\t7\t91\t99",
24346   "\t6\t92\t99",
24347   "\t5\t92\t98",
24348   "\t4\t94\t96",
24349   "Antongila Bay",
24350   "\t-14\t48\t51",
24351   "\t-15\t48\t51",
24352   "\t-16\t48\t51",
24353   "\t-17\t48\t50",
24354   "Arabian Sea",
24355   "\t26\t60\t67",
24356   "\t25\t59\t68",
24357   "\t24\t59\t69",
24358   "\t23\t58\t70",
24359   "\t22\t57\t71",
24360   "\t21\t57\t72",
24361   "\t20\t56\t74",
24362   "\t19\t55\t74",
24363   "\t18\t53\t74",
24364   "\t17\t51\t74",
24365   "\t16\t50\t74",
24366   "\t15\t50\t75",
24367   "\t14\t50\t75",
24368   "\t13\t50\t75",
24369   "\t12\t50\t74",
24370   "\t11\t50\t72",
24371   "\t10\t50\t72",
24372   "\t9\t50\t72",
24373   "\t8\t52\t72",
24374   "\t7\t54\t73",
24375   "\t6\t56\t73",
24376   "\t5\t58\t73",
24377   "\t4\t60\t73",
24378   "\t3\t63\t73",
24379   "\t2\t65\t73",
24380   "\t1\t67\t74",
24381   "\t0\t69\t74",
24382   "\t-1\t71\t74",
24383   "Arafura Sea",
24384   "\t-2\t132\t135",
24385   "\t-3\t132\t138",
24386   "\t-4\t131\t139",
24387   "\t-5\t131\t139",
24388   "\t-6\t130\t141",
24389   "\t-7\t129\t141",
24390   "\t-8\t129\t142",
24391   "\t-9\t129\t143",
24392   "\t-10\t129\t143",
24393   "\t-11\t130\t143",
24394   "\t-12\t130\t143",
24395   "\t-13\t133\t142",
24396   "Aral Sea",
24397   "\t47\t58\t62",
24398   "\t46\t57\t62",
24399   "\t45\t57\t62",
24400   "\t44\t57\t61",
24401   "\t43\t57\t61",
24402   "Arctic Ocean",
24403   "\t90\t-180\t180",
24404   "\t89\t-180\t180",
24405   "\t88\t-180\t180",
24406   "\t87\t-180\t180",
24407   "\t86\t-180\t180",
24408   "\t85\t-180\t180",
24409   "\t84\t-180\t180",
24410   "\t83\t-180\t180",
24411   "\t82\t-180\t180",
24412   "\t81\t-180\t-69\t-18\t180",
24413   "\t80\t-180\t-75\t-4\t180",
24414   "\t79\t-180\t-75\t10\t50\t100\t180",
24415   "\t78\t-180\t-99\t-88\t-79\t108\t180",
24416   "\t77\t-180\t-107\t117\t180",
24417   "\t76\t-180\t-112\t125\t180",
24418   "\t75\t-180\t-119\t133\t148\t157\t180",
24419   "\t74\t-180\t-125\t161\t180",
24420   "\t73\t-180\t-132\t165\t180",
24421   "\t72\t-180\t-138\t169\t180",
24422   "\t71\t-180\t-145\t173\t180",
24423   "\t70\t-180\t-152\t177\t180",
24424   "Atlantic Ocean",
24425   "\t69\t-33\t-29",
24426   "\t68\t-34\t-27",
24427   "\t67\t-39\t-26",
24428   "\t66\t-42\t-24",
24429   "\t65\t-42\t-20\t-17\t-11",
24430   "\t64\t-43\t-8",
24431   "\t63\t-43\t-6",
24432   "\t62\t-43\t-3",
24433   "\t61\t-44\t0",
24434   "\t60\t-44\t0",
24435   "\t59\t-45\t0",
24436   "\t58\t-45\t0",
24437   "\t57\t-46\t-1",
24438   "\t56\t-47\t-5",
24439   "\t55\t-48\t-6",
24440   "\t54\t-48\t-7",
24441   "\t53\t-49\t-5",
24442   "\t52\t-50\t-5",
24443   "\t51\t-51\t-4",
24444   "\t50\t-51\t-4",
24445   "\t49\t-52\t-4",
24446   "\t48\t-56\t-4",
24447   "\t47\t-60\t-4",
24448   "\t46\t-62\t-5",
24449   "\t45\t-65\t-5",
24450   "\t44\t-66\t-6",
24451   "\t43\t-68\t-6",
24452   "\t42\t-74\t-6",
24453   "\t41\t-75\t-7",
24454   "\t40\t-75\t-7",
24455   "\t39\t-76\t-7",
24456   "\t38\t-76\t-5",
24457   "\t37\t-77\t-4",
24458   "\t36\t-77\t-4",
24459   "\t35\t-78\t-4",
24460   "\t34\t-80\t-4",
24461   "\t33\t-81\t-5",
24462   "\t32\t-82\t-5",
24463   "\t31\t-82\t-7",
24464   "\t30\t-82\t-8",
24465   "\t29\t-82\t-8",
24466   "\t28\t-82\t-8",
24467   "\t27\t-81\t-9",
24468   "\t26\t-81\t-11",
24469   "\t25\t-81\t-12",
24470   "\t24\t-81\t-13",
24471   "\t23\t-81\t-13",
24472   "\t22\t-81\t-14",
24473   "\t21\t-81\t-15",
24474   "\t20\t-78\t-15",
24475   "\t19\t-76\t-15",
24476   "\t18\t-74\t-15",
24477   "\t17\t-69\t-15",
24478   "\t16\t-62\t-15",
24479   "\t15\t-61\t-15",
24480   "\t14\t-61\t-14",
24481   "\t13\t-60\t-14",
24482   "\t12\t-61\t-14",
24483   "\t11\t-61\t-13",
24484   "\t10\t-62\t-12",
24485   "\t9\t-62\t-11",
24486   "\t8\t-62\t-10",
24487   "\t7\t-61\t-9",
24488   "\t6\t-59\t-8",
24489   "\t5\t-59\t-5",
24490   "\t4\t-58\t-2",
24491   "\t3\t-53\t2",
24492   "\t2\t-52\t5",
24493   "\t1\t-51\t7",
24494   "\t0\t-51\t7",
24495   "\t-1\t-51\t7",
24496   "Atlantic Ocean",
24497   "\t1\t-50\t9",
24498   "\t0\t-50\t10",
24499   "\t-1\t-50\t11",
24500   "\t-2\t-47\t12",
24501   "\t-3\t-44\t13",
24502   "\t-4\t-40\t14",
24503   "\t-5\t-39\t14",
24504   "\t-6\t-37\t14",
24505   "\t-7\t-36\t14",
24506   "\t-8\t-36\t14",
24507   "\t-9\t-37\t14",
24508   "\t-10\t-38\t14",
24509   "\t-11\t-39\t14",
24510   "\t-12\t-40\t14",
24511   "\t-13\t-40\t14",
24512   "\t-14\t-40\t13",
24513   "\t-15\t-40\t13",
24514   "\t-16\t-40\t13",
24515   "\t-17\t-40\t13",
24516   "\t-18\t-41\t13",
24517   "\t-19\t-41\t14",
24518   "\t-20\t-42\t14",
24519   "\t-21\t-45\t15",
24520   "\t-22\t-46\t15",
24521   "\t-23\t-48\t15",
24522   "\t-24\t-49\t15",
24523   "\t-25\t-49\t16",
24524   "\t-26\t-49\t16",
24525   "\t-27\t-50\t17",
24526   "\t-28\t-51\t17",
24527   "\t-29\t-51\t18",
24528   "\t-30\t-52\t19",
24529   "\t-31\t-53\t19",
24530   "\t-32\t-54\t19",
24531   "\t-33\t-55\t20",
24532   "\t-34\t-56\t20",
24533   "\t-35\t-57\t20",
24534   "\t-36\t-58\t20",
24535   "\t-37\t-62\t20",
24536   "\t-38\t-63\t20",
24537   "\t-39\t-63\t20",
24538   "\t-40\t-64\t20",
24539   "\t-41\t-66\t20",
24540   "\t-42\t-66\t20",
24541   "\t-43\t-66\t20",
24542   "\t-44\t-66\t20",
24543   "\t-45\t-66\t20",
24544   "\t-46\t-67\t20",
24545   "\t-47\t-68\t20",
24546   "\t-48\t-68\t20",
24547   "\t-49\t-68\t20",
24548   "\t-50\t-69\t20",
24549   "\t-51\t-69\t20",
24550   "\t-52\t-69\t20",
24551   "\t-53\t-70\t20",
24552   "\t-54\t-70\t20",
24553   "\t-55\t-70\t20",
24554   "\t-56\t-69\t20",
24555   "\t-57\t-69\t20",
24556   "\t-58\t-69\t20",
24557   "\t-59\t-69\t20",
24558   "\t-60\t-69\t20",
24559   "\t-61\t-69\t20",
24560   "Bab el Mandeb",
24561   "\t14\t42\t44",
24562   "\t13\t42\t44",
24563   "\t12\t42\t44",
24564   "\t11\t42\t44",
24565   "Baffin Bay",
24566   "\t79\t-77\t-71",
24567   "\t78\t-83\t-70",
24568   "\t77\t-83\t-66",
24569   "\t76\t-83\t-61",
24570   "\t75\t-81\t-55",
24571   "\t74\t-81\t-54",
24572   "\t73\t-81\t-53",
24573   "\t72\t-79\t-53",
24574   "\t71\t-78\t-53",
24575   "\t70\t-76\t-53",
24576   "\t69\t-73\t-53",
24577   "\t68\t-70\t-53",
24578   "Bahia Blanca",
24579   "\t-37\t-63\t-60",
24580   "\t-38\t-63\t-60",
24581   "\t-39\t-63\t-60",
24582   "\t-40\t-63\t-60",
24583   "Bahia de Campeche",
24584   "\t22\t-94\t-89",
24585   "\t21\t-98\t-89",
24586   "\t20\t-98\t-89",
24587   "\t19\t-98\t-89",
24588   "\t18\t-97\t-89",
24589   "\t17\t-96\t-90",
24590   "Bahia Grande",
24591   "\t-48\t-69\t-66",
24592   "\t-49\t-70\t-66",
24593   "\t-50\t-70\t-66",
24594   "\t-51\t-70\t-66",
24595   "\t-52\t-70\t-67",
24596   "\t-53\t-69\t-67",
24597   "Bahia Inutil",
24598   "\t-52\t-71\t-68",
24599   "\t-53\t-71\t-68",
24600   "\t-54\t-71\t-68",
24601   "\t-55\t-71\t-68",
24602   "Baia de Maputo",
24603   "\t-24\t31\t33",
24604   "\t-25\t31\t33",
24605   "\t-26\t31\t33",
24606   "\t-27\t31\t33",
24607   "Baia de Marajo",
24608   "\t1\t-49\t-47",
24609   "\t0\t-50\t-47",
24610   "\t-1\t-50\t-47",
24611   "\t-2\t-50\t-47",
24612   "\t-3\t-50\t-48",
24613   "Baia de Sao Marcos",
24614   "\t0\t-45\t-43",
24615   "\t-1\t-45\t-42",
24616   "\t-2\t-45\t-42",
24617   "\t-3\t-45\t-42",
24618   "\t-4\t-45\t-43",
24619   "Baird Inlet",
24620   "\t61\t-165\t-162",
24621   "\t60\t-165\t-162",
24622   "\t59\t-165\t-162",
24623   "Balearic Sea",
24624   "\t42\t0\t4",
24625   "\t41\t-1\t5",
24626   "\t40\t-1\t5",
24627   "\t39\t-1\t5",
24628   "\t38\t-1\t5",
24629   "\t37\t-1\t3",
24630   "Bali Sea",
24631   "\t-5\t114\t117",
24632   "\t-6\t113\t118",
24633   "\t-7\t113\t118",
24634   "\t-8\t113\t118",
24635   "\t-9\t113\t118",
24636   "\t-10\t115\t117",
24637   "Baltic Sea",
24638   "\t60\t16\t24",
24639   "\t59\t15\t24",
24640   "\t58\t15\t24",
24641   "\t57\t13\t23",
24642   "\t56\t11\t23",
24643   "\t55\t11\t22",
24644   "\t54\t11\t22",
24645   "\t53\t11\t21",
24646   "\t52\t13\t15",
24647   "Banda Sea",
24648   "\t1\t121\t124",
24649   "\t0\t120\t126",
24650   "\t-1\t119\t129",
24651   "\t-2\t119\t131",
24652   "\t-3\t119\t133",
24653   "\t-4\t119\t134",
24654   "\t-5\t119\t134",
24655   "\t-6\t119\t134",
24656   "\t-7\t119\t133",
24657   "\t-8\t119\t132",
24658   "\t-9\t121\t132",
24659   "Barents Sea",
24660   "\t82\t49\t66",
24661   "\t81\t16\t19\t26\t66",
24662   "\t80\t16\t67",
24663   "\t79\t16\t67",
24664   "\t78\t16\t68",
24665   "\t77\t16\t69",
24666   "\t76\t16\t69",
24667   "\t75\t16\t69",
24668   "\t74\t18\t61",
24669   "\t73\t20\t57",
24670   "\t72\t22\t55",
24671   "\t71\t24\t59",
24672   "\t70\t26\t61",
24673   "\t69\t26\t61",
24674   "\t68\t28\t61",
24675   "\t67\t36\t61",
24676   "\t66\t43\t50",
24677   "\t65\t44\t48",
24678   "Bass Strait",
24679   "\t-36\t143\t150",
24680   "\t-37\t142\t150",
24681   "\t-38\t142\t150",
24682   "\t-39\t142\t149",
24683   "\t-40\t142\t149",
24684   "\t-41\t142\t149",
24685   "\t-42\t144\t148",
24686   "Bathurst Inlet",
24687   "\t68\t-109\t-106",
24688   "\t67\t-109\t-106",
24689   "\t66\t-109\t-106",
24690   "\t65\t-109\t-106",
24691   "Bay of Bengal",
24692   "\t24\t89\t91",
24693   "\t23\t86\t92",
24694   "\t22\t85\t93",
24695   "\t21\t85\t94",
24696   "\t20\t83\t95",
24697   "\t19\t82\t95",
24698   "\t18\t81\t95",
24699   "\t17\t80\t95",
24700   "\t16\t79\t95",
24701   "\t15\t79\t95",
24702   "\t14\t79\t94",
24703   "\t13\t78\t93",
24704   "\t12\t78\t93",
24705   "\t11\t78\t93",
24706   "\t10\t78\t93",
24707   "\t9\t78\t93",
24708   "\t8\t79\t94",
24709   "\t7\t79\t95",
24710   "\t6\t80\t96",
24711   "\t5\t84\t96",
24712   "\t4\t91\t96",
24713   "Bay of Biscay",
24714   "\t49\t-6\t-3",
24715   "\t48\t-7\t0",
24716   "\t47\t-7\t0",
24717   "\t46\t-8\t1",
24718   "\t45\t-8\t1",
24719   "\t44\t-9\t1",
24720   "\t43\t-9\t0",
24721   "\t42\t-9\t0",
24722   "Bay of Fundy",
24723   "\t46\t-68\t-62",
24724   "\t45\t-68\t-62",
24725   "\t44\t-68\t-62",
24726   "\t43\t-68\t-64",
24727   "Bay of Plenty",
24728   "\t-35\t174\t177",
24729   "\t-36\t174\t179",
24730   "\t-37\t174\t179",
24731   "\t-38\t174\t179",
24732   "Beaufort Sea",
24733   "\t77\t-126\t-121",
24734   "\t76\t-133\t-121",
24735   "\t75\t-139\t-121",
24736   "\t74\t-146\t-122",
24737   "\t73\t-153\t-122",
24738   "\t72\t-157\t-122",
24739   "\t71\t-157\t-123",
24740   "\t70\t-157\t-124",
24741   "\t69\t-157\t-125",
24742   "\t68\t-145\t-127",
24743   "Bellingshausen Sea",
24744   "\t-67\t-74\t-70",
24745   "\t-68\t-80\t-70",
24746   "\t-69\t-86\t-68",
24747   "\t-70\t-92\t-68",
24748   "\t-71\t-96\t-68",
24749   "\t-72\t-96\t-68",
24750   "\t-73\t-96\t-73",
24751   "\t-74\t-96\t-73",
24752   "Bering Sea",
24753   "\t67\t-171\t-168",
24754   "\t66\t-173\t-165",
24755   "\t65\t-175\t-163",
24756   "\t64\t-177\t-163",
24757   "\t63\t-179\t-163\t174\t180",
24758   "\t62\t-180\t-163\t171\t180",
24759   "\t61\t-180\t-160\t165\t180",
24760   "\t60\t-180\t-160\t165\t180",
24761   "\t59\t-180\t-160\t163\t180",
24762   "\t58\t-180\t-160\t161\t180",
24763   "\t57\t-180\t-160\t161\t180",
24764   "\t56\t-180\t-160\t161\t180",
24765   "\t55\t-180\t-160\t161\t180",
24766   "\t54\t-180\t-160\t163\t180",
24767   "\t53\t-180\t-161\t165\t180",
24768   "\t52\t-180\t-163\t167\t180",
24769   "\t51\t-180\t-166\t169\t180",
24770   "\t50\t-180\t-171\t171\t180",
24771   "\t49\t178\t180",
24772   "Bering Strait",
24773   "\t67\t-171\t-168",
24774   "\t66\t-171\t-166",
24775   "\t65\t-171\t-166",
24776   "\t64\t-171\t-166",
24777   "Bight of Benin",
24778   "\t7\t0\t5",
24779   "\t6\t-1\t6",
24780   "\t5\t-1\t6",
24781   "\t4\t-1\t6",
24782   "\t3\t2\t6",
24783   "Bight of Biafra",
24784   "\t5\t5\t10",
24785   "\t4\t5\t10",
24786   "\t3\t5\t10",
24787   "\t2\t7\t10",
24788   "\t1\t8\t10",
24789   "Bismarck Sea",
24790   "\t0\t141\t148",
24791   "\t-1\t140\t152",
24792   "\t-2\t140\t153",
24793   "\t-3\t140\t153",
24794   "\t-4\t141\t153",
24795   "\t-5\t143\t153",
24796   "\t-6\t144\t152",
24797   "Black Sea",
24798   "\t48\t30\t32",
24799   "\t47\t29\t34",
24800   "\t46\t28\t37",
24801   "\t45\t27\t39",
24802   "\t44\t26\t41",
24803   "\t43\t26\t42",
24804   "\t42\t26\t42",
24805   "\t41\t26\t42",
24806   "\t40\t26\t42",
24807   "\t39\t37\t41",
24808   "Bo Hai",
24809   "\t41\t119\t123",
24810   "\t40\t116\t123",
24811   "\t39\t116\t123",
24812   "\t38\t116\t122",
24813   "\t37\t116\t122",
24814   "\t36\t117\t121",
24815   "Boca Grande",
24816   "\t10\t-62\t-59",
24817   "\t9\t-62\t-59",
24818   "\t8\t-62\t-59",
24819   "\t7\t-62\t-59",
24820   "Bohol Sea",
24821   "\t11\t122\t126",
24822   "\t10\t122\t126",
24823   "\t9\t122\t126",
24824   "\t8\t122\t126",
24825   "\t7\t122\t126",
24826   "Boknafjorden",
24827   "\t60\t4\t7",
24828   "\t59\t4\t7",
24829   "\t58\t4\t7",
24830   "\t57\t4\t7",
24831   "Bosporus",
24832   "\t42\t27\t30",
24833   "\t41\t27\t30",
24834   "\t40\t27\t30",
24835   "Bransfield Strait",
24836   "\t-60\t-58\t-53",
24837   "\t-61\t-63\t-53",
24838   "\t-62\t-63\t-53",
24839   "\t-63\t-64\t-53",
24840   "\t-64\t-64\t-54",
24841   "\t-65\t-64\t-59",
24842   "\t-66\t-64\t-62",
24843   "Bristol Bay",
24844   "\t60\t-161\t-155",
24845   "\t59\t-163\t-155",
24846   "\t58\t-163\t-155",
24847   "\t57\t-163\t-155",
24848   "\t56\t-163\t-156",
24849   "\t55\t-162\t-157",
24850   "\t54\t-162\t-159",
24851   "Bristol Channel",
24852   "\t52\t-7\t-1",
24853   "\t51\t-7\t-1",
24854   "\t50\t-7\t-1",
24855   "\t49\t-6\t-3",
24856   "Caribbean Sea",
24857   "\t23\t-84\t-79",
24858   "\t22\t-88\t-77",
24859   "\t21\t-88\t-73",
24860   "\t20\t-88\t-71",
24861   "\t19\t-89\t-60",
24862   "\t18\t-89\t-60",
24863   "\t17\t-89\t-59",
24864   "\t16\t-89\t-59",
24865   "\t15\t-88\t-58",
24866   "\t14\t-87\t-58",
24867   "\t13\t-84\t-58",
24868   "\t12\t-84\t-58",
24869   "\t11\t-84\t-58",
24870   "\t10\t-84\t-59",
24871   "\t9\t-84\t-59",
24872   "\t8\t-84\t-74\t-63\t-59",
24873   "\t7\t-83\t-75",
24874   "Caspian Sea",
24875   "\t48\t49\t52",
24876   "\t47\t47\t54",
24877   "\t46\t46\t54",
24878   "\t45\t45\t54",
24879   "\t44\t45\t54",
24880   "\t43\t45\t53",
24881   "\t42\t46\t53",
24882   "\t41\t46\t53",
24883   "\t40\t47\t54",
24884   "\t39\t47\t54",
24885   "\t38\t47\t54",
24886   "\t37\t47\t55",
24887   "\t36\t47\t55",
24888   "\t35\t49\t55",
24889   "Celebes Sea",
24890   "\t8\t121\t125",
24891   "\t7\t120\t126",
24892   "\t6\t117\t126",
24893   "\t5\t116\t126",
24894   "\t4\t116\t126",
24895   "\t3\t116\t126",
24896   "\t2\t116\t126",
24897   "\t1\t116\t126",
24898   "\t0\t116\t126",
24899   "\t-1\t117\t124",
24900   "Ceram Sea",
24901   "\t0\t124\t133",
24902   "\t-1\t124\t134",
24903   "\t-2\t124\t134",
24904   "\t-3\t124\t134",
24905   "\t-4\t124\t126\t129\t134",
24906   "\t-5\t130\t134",
24907   "\t-6\t132\t134",
24908   "Chaun Bay",
24909   "\t70\t167\t171",
24910   "\t69\t167\t171",
24911   "\t68\t167\t171",
24912   "\t67\t168\t171",
24913   "Chesapeake Bay",
24914   "\t40\t-77\t-74",
24915   "\t39\t-78\t-74",
24916   "\t38\t-78\t-74",
24917   "\t37\t-78\t-74",
24918   "\t36\t-78\t-74",
24919   "\t35\t-77\t-75",
24920   "Chukchi Sea",
24921   "\t72\t-179\t-155\t177\t179",
24922   "\t71\t-180\t-155\t175\t180",
24923   "\t70\t-180\t-155\t174\t180",
24924   "\t69\t-180\t-156\t174\t180",
24925   "\t68\t-180\t-161\t174\t180",
24926   "\t67\t-180\t-162\t179\t180",
24927   "\t66\t-176\t-162",
24928   "\t65\t-175\t-163",
24929   "\t64\t-169\t-165",
24930   "Cook Inlet",
24931   "\t62\t-152\t-148",
24932   "\t61\t-154\t-148",
24933   "\t60\t-155\t-148",
24934   "\t59\t-155\t-148",
24935   "\t58\t-155\t-150",
24936   "\t57\t-154\t-151",
24937   "Cook Strait",
24938   "\t-39\t173\t176",
24939   "\t-40\t173\t176",
24940   "\t-41\t173\t176",
24941   "\t-42\t173\t176",
24942   "Coral Sea",
24943   "\t-7\t142\t147",
24944   "\t-8\t141\t148\t164\t168",
24945   "\t-9\t141\t153\t161\t168",
24946   "\t-10\t141\t168",
24947   "\t-11\t141\t168",
24948   "\t-12\t141\t168",
24949   "\t-13\t142\t169",
24950   "\t-14\t142\t169",
24951   "\t-15\t142\t169",
24952   "\t-16\t144\t169",
24953   "\t-17\t144\t170",
24954   "\t-18\t144\t170",
24955   "\t-19\t145\t170",
24956   "\t-20\t145\t170",
24957   "\t-21\t147\t170",
24958   "\t-22\t148\t169",
24959   "\t-23\t148\t168",
24960   "\t-24\t149\t167",
24961   "\t-25\t150\t166",
24962   "\t-26\t151\t165",
24963   "\t-27\t152\t164",
24964   "\t-28\t152\t162",
24965   "\t-29\t152\t161",
24966   "\t-30\t152\t160",
24967   "Cordova Bay",
24968   "\t56\t-134\t-131",
24969   "\t55\t-134\t-131",
24970   "\t54\t-134\t-131",
24971   "\t53\t-133\t-131",
24972   "Cumberland Sound",
24973   "\t67\t-69\t-63",
24974   "\t66\t-69\t-62",
24975   "\t65\t-69\t-62",
24976   "\t64\t-69\t-62",
24977   "\t63\t-67\t-62",
24978   "\t62\t-65\t-63",
24979   "Dardanelles",
24980   "\t41\t25\t27",
24981   "\t40\t25\t27",
24982   "\t39\t25\t27",
24983   "\t38\t25\t27",
24984   "Darnley Bay",
24985   "\t70\t-125\t-122",
24986   "\t69\t-125\t-122",
24987   "\t68\t-125\t-122",
24988   "Davao Gulf",
24989   "\t8\t124\t126",
24990   "\t7\t124\t127",
24991   "\t6\t124\t127",
24992   "\t5\t124\t127",
24993   "\t4\t124\t126",
24994   "Davis Sea",
24995   "\t-62\t90\t104",
24996   "\t-63\t86\t111",
24997   "\t-64\t84\t113",
24998   "\t-65\t83\t113",
24999   "\t-66\t82\t113",
25000   "\t-67\t82\t111",
25001   "\t-68\t82\t87",
25002   "Davis Strait",
25003   "\t70\t-70\t-52",
25004   "\t69\t-70\t-50",
25005   "\t68\t-70\t-49",
25006   "\t67\t-70\t-49",
25007   "\t66\t-67\t-49",
25008   "\t65\t-64\t-49",
25009   "\t64\t-66\t-48",
25010   "\t63\t-66\t-47",
25011   "\t62\t-66\t-44",
25012   "\t61\t-66\t-43",
25013   "\t60\t-65\t-43",
25014   "\t59\t-65\t-43",
25015   "Delaware Bay",
25016   "\t40\t-76\t-73",
25017   "\t39\t-76\t-73",
25018   "\t38\t-76\t-73",
25019   "\t37\t-76\t-73",
25020   "Denmark Strait",
25021   "\t71\t-23\t-21",
25022   "\t70\t-26\t-19",
25023   "\t69\t-31\t-18",
25024   "\t68\t-31\t-16",
25025   "\t67\t-31\t-15",
25026   "\t66\t-30\t-15",
25027   "\t65\t-28\t-15",
25028   "\t64\t-27\t-16",
25029   "\t63\t-25\t-22",
25030   "Disko Bay",
25031   "\t71\t-55\t-49",
25032   "\t70\t-55\t-49",
25033   "\t69\t-55\t-49",
25034   "\t68\t-54\t-49",
25035   "\t67\t-54\t-49",
25036   "Dixon Entrance",
25037   "\t55\t-134\t-130",
25038   "\t54\t-134\t-130",
25039   "\t53\t-134\t-130",
25040   "Dmitriy Laptev Strait",
25041   "\t74\t138\t144",
25042   "\t73\t138\t144",
25043   "\t72\t138\t144",
25044   "\t71\t139\t144",
25045   "Drake Passage",
25046   "\t-53\t-67\t-62",
25047   "\t-54\t-69\t-61",
25048   "\t-55\t-69\t-60",
25049   "\t-56\t-69\t-58",
25050   "\t-57\t-69\t-57",
25051   "\t-58\t-69\t-56",
25052   "\t-59\t-69\t-55",
25053   "\t-60\t-69\t-54",
25054   "\t-61\t-69\t-54",
25055   "\t-62\t-69\t-54",
25056   "\t-63\t-69\t-57",
25057   "\t-64\t-69\t-61",
25058   "\t-65\t-69\t-62",
25059   "\t-66\t-69\t-64",
25060   "\t-67\t-69\t-65",
25061   "East China Sea",
25062   "\t34\t124\t127\t129\t131",
25063   "\t33\t122\t131",
25064   "\t32\t120\t131",
25065   "\t31\t120\t131",
25066   "\t30\t120\t131",
25067   "\t29\t119\t131",
25068   "\t28\t119\t131",
25069   "\t27\t118\t130",
25070   "\t26\t118\t129",
25071   "\t25\t118\t129",
25072   "\t24\t118\t128",
25073   "\t23\t120\t127",
25074   "\t22\t122\t125",
25075   "East Korea Bay",
25076   "\t41\t127\t129",
25077   "\t40\t126\t129",
25078   "\t39\t126\t129",
25079   "\t38\t126\t129",
25080   "\t37\t126\t129",
25081   "East Siberian Sea",
25082   "\t78\t147\t158",
25083   "\t77\t137\t162",
25084   "\t76\t137\t166",
25085   "\t75\t137\t170",
25086   "\t74\t138\t174",
25087   "\t73\t138\t178",
25088   "\t72\t138\t180",
25089   "\t71\t142\t180",
25090   "\t70\t147\t180",
25091   "\t69\t150\t155\t157\t178",
25092   "\t68\t158\t176",
25093   "\t67\t159\t162",
25094   "Eclipse Sound",
25095   "\t74\t-81\t-79",
25096   "\t73\t-82\t-76",
25097   "\t72\t-82\t-76",
25098   "\t71\t-82\t-76",
25099   "\t70\t-81\t-77",
25100   "English Channel",
25101   "\t52\t0\t2",
25102   "\t51\t-6\t2",
25103   "\t50\t-7\t2",
25104   "\t49\t-7\t2",
25105   "\t48\t-7\t2",
25106   "\t47\t-6\t0",
25107   "Eskimo Lakes",
25108   "\t70\t-134\t-130",
25109   "\t69\t-134\t-130",
25110   "\t68\t-134\t-130",
25111   "\t67\t-134\t-131",
25112   "Estrecho de Magellanes",
25113   "\t-51\t-75\t-67",
25114   "\t-52\t-75\t-67",
25115   "\t-53\t-75\t-67",
25116   "\t-54\t-74\t-69",
25117   "\t-55\t-72\t-69",
25118   "Finger Lakes",
25119   "\t43\t-78\t-75",
25120   "\t42\t-78\t-75",
25121   "\t41\t-78\t-75",
25122   "Flores Sea",
25123   "\t-4\t118\t121",
25124   "\t-5\t117\t121",
25125   "\t-6\t116\t122",
25126   "\t-7\t116\t123",
25127   "\t-8\t116\t123",
25128   "\t-9\t116\t123",
25129   "Foxe Basin",
25130   "\t71\t-80\t-76",
25131   "\t70\t-83\t-73",
25132   "\t69\t-83\t-72",
25133   "\t68\t-85\t-71",
25134   "\t67\t-87\t-71",
25135   "\t66\t-87\t-71",
25136   "\t65\t-87\t-71",
25137   "\t64\t-86\t-72",
25138   "\t63\t-84\t-74",
25139   "\t62\t-81\t-78",
25140   "Franklin Bay",
25141   "\t70\t-126\t-124",
25142   "\t69\t-126\t-124",
25143   "\t68\t-126\t-124",
25144   "Frobisher Bay",
25145   "\t64\t-69\t-64",
25146   "\t63\t-69\t-64",
25147   "\t62\t-69\t-64",
25148   "\t61\t-68\t-64",
25149   "Fury and Hecla Strait",
25150   "\t71\t-86\t-82",
25151   "\t70\t-86\t-81",
25152   "\t69\t-86\t-81",
25153   "\t68\t-86\t-81",
25154   "Garabogaz Bay",
25155   "\t43\t52\t54",
25156   "\t42\t51\t55",
25157   "\t41\t51\t55",
25158   "\t40\t51\t55",
25159   "\t39\t51\t55",
25160   "Geographe Bay",
25161   "\t-29\t114\t116",
25162   "\t-30\t114\t116",
25163   "\t-31\t114\t116",
25164   "\t-32\t114\t116",
25165   "\t-33\t114\t116",
25166   "\t-34\t114\t116",
25167   "George VI Sound",
25168   "\t-68\t-70\t-67",
25169   "\t-69\t-70\t-66",
25170   "\t-70\t-74\t-65",
25171   "\t-71\t-75\t-65",
25172   "\t-72\t-75\t-65",
25173   "\t-73\t-75\t-65",
25174   "\t-74\t-75\t-67",
25175   "Goldsmith Channel",
25176   "\t74\t-108\t-104",
25177   "\t73\t-108\t-104",
25178   "\t72\t-108\t-104",
25179   "\t71\t-106\t-104",
25180   "Golfe du Lion",
25181   "\t44\t2\t6",
25182   "\t43\t2\t6",
25183   "\t42\t2\t6",
25184   "\t41\t2\t5",
25185   "\t40\t2\t4",
25186   "Golfo Corcovado",
25187   "\t-40\t-74\t-71",
25188   "\t-41\t-74\t-71",
25189   "\t-42\t-74\t-71",
25190   "\t-43\t-75\t-71",
25191   "\t-44\t-75\t-71",
25192   "\t-45\t-75\t-71",
25193   "\t-46\t-74\t-71",
25194   "Golfo de California",
25195   "\t32\t-115\t-112",
25196   "\t31\t-115\t-111",
25197   "\t30\t-115\t-111",
25198   "\t29\t-115\t-110",
25199   "\t28\t-115\t-108",
25200   "\t27\t-114\t-108",
25201   "\t26\t-113\t-107",
25202   "\t25\t-113\t-106",
25203   "\t24\t-112\t-105",
25204   "\t23\t-111\t-105",
25205   "\t22\t-110\t-105",
25206   "Golfo de Guayaquil",
25207   "\t-1\t-81\t-78",
25208   "\t-2\t-81\t-78",
25209   "\t-3\t-81\t-78",
25210   "\t-4\t-81\t-78",
25211   "Golfo de Panama",
25212   "\t10\t-80\t-78",
25213   "\t9\t-81\t-76",
25214   "\t8\t-81\t-76",
25215   "\t7\t-81\t-76",
25216   "\t6\t-81\t-77",
25217   "Golfo de Penas",
25218   "\t-45\t-76\t-73",
25219   "\t-46\t-76\t-73",
25220   "\t-47\t-76\t-73",
25221   "\t-48\t-76\t-73",
25222   "Golfo de Tehuantepec",
25223   "\t17\t-96\t-92",
25224   "\t16\t-97\t-92",
25225   "\t15\t-97\t-92",
25226   "\t14\t-97\t-92",
25227   "Golfo de Uraba",
25228   "\t9\t-78\t-75",
25229   "\t8\t-78\t-75",
25230   "\t7\t-78\t-75",
25231   "\t6\t-77\t-75",
25232   "Golfo San Jorge",
25233   "\t-43\t-67\t-65",
25234   "\t-44\t-68\t-64",
25235   "\t-45\t-68\t-64",
25236   "\t-46\t-68\t-64",
25237   "\t-47\t-68\t-64",
25238   "\t-48\t-67\t-64",
25239   "Golfo San Matias",
25240   "\t-39\t-66\t-63",
25241   "\t-40\t-66\t-62",
25242   "\t-41\t-66\t-62",
25243   "\t-42\t-66\t-62",
25244   "\t-43\t-65\t-62",
25245   "Great Australian Bight",
25246   "\t-30\t127\t133",
25247   "\t-31\t123\t135",
25248   "\t-32\t118\t136",
25249   "\t-33\t117\t136",
25250   "\t-34\t117\t140",
25251   "\t-35\t117\t140",
25252   "\t-36\t117\t141",
25253   "\t-37\t119\t144",
25254   "\t-38\t123\t144",
25255   "\t-39\t126\t145",
25256   "\t-40\t129\t146",
25257   "\t-41\t133\t146",
25258   "\t-42\t136\t147",
25259   "\t-43\t139\t147",
25260   "\t-44\t143\t147",
25261   "Great Barrier Reef",
25262   "\t-8\t141\t146",
25263   "\t-9\t141\t146",
25264   "\t-10\t141\t146",
25265   "\t-11\t141\t146",
25266   "\t-12\t141\t147",
25267   "\t-13\t142\t148",
25268   "\t-14\t142\t148",
25269   "\t-15\t142\t148",
25270   "\t-16\t144\t149",
25271   "\t-17\t144\t150",
25272   "\t-18\t144\t151",
25273   "\t-19\t145\t151",
25274   "\t-20\t145\t152",
25275   "\t-21\t147\t154",
25276   "\t-22\t148\t154",
25277   "\t-23\t148\t154",
25278   "\t-24\t149\t154",
25279   "\t-25\t150\t154",
25280   "\t-26\t151\t154",
25281   "Great Bear Lake",
25282   "\t68\t-121\t-118",
25283   "\t67\t-126\t-116",
25284   "\t66\t-126\t-116",
25285   "\t65\t-126\t-116",
25286   "\t64\t-125\t-116",
25287   "\t63\t-123\t-119",
25288   "Great Salt Lake",
25289   "\t42\t-114\t-110",
25290   "\t41\t-114\t-110",
25291   "\t40\t-114\t-110",
25292   "\t39\t-113\t-110",
25293   "Great Slave Lake",
25294   "\t63\t-117\t-108",
25295   "\t62\t-118\t-108",
25296   "\t61\t-118\t-108",
25297   "\t60\t-118\t-110",
25298   "\t59\t-117\t-113",
25299   "Greenland Sea",
25300   "\t84\t-32\t-17",
25301   "\t83\t-33\t-3",
25302   "\t82\t-33\t11",
25303   "\t81\t-33\t18",
25304   "\t80\t-30\t-27\t-25\t18",
25305   "\t79\t-24\t18",
25306   "\t78\t-22\t18",
25307   "\t77\t-23\t18",
25308   "\t76\t-23\t18",
25309   "\t75\t-23\t17",
25310   "\t74\t-28\t14",
25311   "\t73\t-28\t10",
25312   "\t72\t-28\t5",
25313   "\t71\t-27\t0",
25314   "\t70\t-26\t-4",
25315   "\t69\t-29\t-7",
25316   "\t68\t-29\t-9",
25317   "\t67\t-29\t-10",
25318   "\t66\t-27\t-10",
25319   "\t65\t-25\t-11",
25320   "\t64\t-24\t-12",
25321   "Guba Gusinaya",
25322   "\t73\t144\t148",
25323   "\t72\t143\t148",
25324   "\t71\t143\t148",
25325   "\t70\t143\t147",
25326   "Gulf of Aden",
25327   "\t16\t49\t52",
25328   "\t15\t46\t52",
25329   "\t14\t44\t52",
25330   "\t13\t42\t52",
25331   "\t12\t41\t52",
25332   "\t11\t41\t52",
25333   "\t10\t41\t52",
25334   "\t9\t42\t47",
25335   "Gulf of Alaska",
25336   "\t61\t-150\t-138",
25337   "\t60\t-152\t-137",
25338   "\t59\t-156\t-135",
25339   "\t58\t-157\t-135",
25340   "\t57\t-159\t-135",
25341   "\t56\t-164\t-139",
25342   "\t55\t-164\t-145",
25343   "\t54\t-164\t-152",
25344   "\t53\t-164\t-158",
25345   "Gulf of Anadyr",
25346   "\t67\t-180\t-177",
25347   "\t66\t-180\t-174",
25348   "\t65\t-180\t-172",
25349   "\t64\t-180\t-172",
25350   "\t63\t-180\t-172",
25351   "\t62\t-180\t-174",
25352   "\t61\t-180\t-176",
25353   "\t60\t-180\t-178",
25354   "Gulf of Anadyr",
25355   "\t66\t175\t180",
25356   "\t65\t173\t180",
25357   "\t64\t173\t180",
25358   "\t63\t173\t180",
25359   "\t62\t177\t180",
25360   "\t61\t178\t180",
25361   "Gulf of Aqaba",
25362   "\t30\t33\t35",
25363   "\t29\t33\t35",
25364   "\t28\t33\t35",
25365   "\t27\t33\t35",
25366   "\t26\t33\t35",
25367   "Gulf of Boothia",
25368   "\t72\t-90\t-88",
25369   "\t71\t-93\t-84",
25370   "\t70\t-93\t-83",
25371   "\t69\t-93\t-83",
25372   "\t68\t-93\t-83",
25373   "\t67\t-91\t-83",
25374   "\t66\t-89\t-85",
25375   "Gulf of Bothnia",
25376   "\t66\t20\t26",
25377   "\t65\t20\t26",
25378   "\t64\t17\t26",
25379   "\t63\t16\t26",
25380   "\t62\t16\t24",
25381   "\t61\t16\t24",
25382   "\t60\t16\t24",
25383   "\t59\t16\t24",
25384   "\t58\t17\t24",
25385   "Gulf of Buli",
25386   "\t2\t127\t130",
25387   "\t1\t127\t130",
25388   "\t0\t127\t130",
25389   "\t-1\t127\t130",
25390   "Gulf of Carpentaria",
25391   "\t-11\t135\t142",
25392   "\t-12\t134\t142",
25393   "\t-13\t134\t142",
25394   "\t-14\t134\t142",
25395   "\t-15\t134\t142",
25396   "\t-16\t134\t142",
25397   "\t-17\t136\t142",
25398   "\t-18\t138\t141",
25399   "Gulf of Finland",
25400   "\t61\t23\t31",
25401   "\t60\t21\t31",
25402   "\t59\t21\t31",
25403   "\t58\t21\t31",
25404   "Gulf of Gabes",
25405   "\t36\t9\t12",
25406   "\t35\t9\t12",
25407   "\t34\t9\t12",
25408   "\t33\t9\t12",
25409   "\t32\t9\t11",
25410   "Gulf of Guinea",
25411   "\t6\t-6\t3",
25412   "\t5\t-8\t8",
25413   "\t4\t-8\t9",
25414   "\t3\t-8\t10",
25415   "\t2\t-6\t10",
25416   "\t1\t-3\t11",
25417   "\t0\t1\t11",
25418   "\t-1\t4\t11",
25419   "Gulf of Honduras",
25420   "\t18\t-89\t-87",
25421   "\t17\t-89\t-86",
25422   "\t16\t-89\t-85",
25423   "\t15\t-89\t-85",
25424   "\t14\t-89\t-85",
25425   "Gulf of Kamchatka",
25426   "\t57\t161\t164",
25427   "\t56\t160\t164",
25428   "\t55\t160\t164",
25429   "\t54\t160\t164",
25430   "\t53\t160\t163",
25431   "Gulf of Kau",
25432   "\t3\t127\t129",
25433   "\t2\t126\t130",
25434   "\t1\t126\t130",
25435   "\t0\t126\t130",
25436   "\t-1\t126\t128",
25437   "Gulf of Khambhat",
25438   "\t23\t71\t73",
25439   "\t22\t71\t74",
25440   "\t21\t69\t74",
25441   "\t20\t69\t74",
25442   "\t19\t69\t73",
25443   "\t18\t71\t73",
25444   "Gulf of Kutch",
25445   "\t24\t67\t71",
25446   "\t23\t67\t71",
25447   "\t22\t67\t71",
25448   "\t21\t67\t71",
25449   "Gulf of Maine",
25450   "\t45\t-70\t-65",
25451   "\t44\t-71\t-64",
25452   "\t43\t-71\t-64",
25453   "\t42\t-71\t-64",
25454   "\t41\t-71\t-65",
25455   "\t40\t-70\t-67",
25456   "Gulf of Mannar",
25457   "\t10\t77\t80",
25458   "\t9\t76\t80",
25459   "\t8\t76\t80",
25460   "\t7\t76\t80",
25461   "\t6\t78\t80",
25462   "Gulf of Martaban",
25463   "\t18\t95\t98",
25464   "\t17\t94\t98",
25465   "\t16\t94\t98",
25466   "\t15\t94\t98",
25467   "\t14\t94\t98",
25468   "\t13\t96\t98",
25469   "Gulf of Masira",
25470   "\t21\t56\t59",
25471   "\t20\t56\t59",
25472   "\t19\t56\t59",
25473   "\t18\t56\t58",
25474   "Gulf of Mexico",
25475   "\t31\t-90\t-83",
25476   "\t30\t-96\t-81",
25477   "\t29\t-98\t-81",
25478   "\t28\t-98\t-81",
25479   "\t27\t-98\t-80",
25480   "\t26\t-98\t-79",
25481   "\t25\t-98\t-78",
25482   "\t24\t-98\t-78",
25483   "\t23\t-98\t-78",
25484   "\t22\t-98\t-82",
25485   "\t21\t-98\t-82",
25486   "\t20\t-98\t-83",
25487   "\t19\t-98\t-93",
25488   "Gulf of Ob",
25489   "\t73\t71\t76",
25490   "\t72\t70\t76",
25491   "\t71\t70\t76",
25492   "\t70\t70\t77",
25493   "\t69\t71\t78",
25494   "\t68\t70\t79",
25495   "\t67\t68\t79",
25496   "\t66\t68\t79",
25497   "\t65\t68\t74",
25498   "Gulf of Olenek",
25499   "\t74\t117\t124",
25500   "\t73\t117\t124",
25501   "\t72\t117\t124",
25502   "\t71\t118\t124",
25503   "Gulf of Oman",
25504   "\t27\t55\t58",
25505   "\t26\t55\t62",
25506   "\t25\t55\t62",
25507   "\t24\t55\t62",
25508   "\t23\t55\t61",
25509   "\t22\t56\t61",
25510   "\t21\t58\t60",
25511   "Gulf of Papua",
25512   "\t-6\t142\t146",
25513   "\t-7\t141\t147",
25514   "\t-8\t141\t147",
25515   "\t-9\t141\t147",
25516   "Gulf of Riga",
25517   "\t60\t22\t24",
25518   "\t59\t21\t25",
25519   "\t58\t20\t25",
25520   "\t57\t20\t25",
25521   "\t56\t20\t25",
25522   "\t55\t22\t24",
25523   "Gulf of Sakhalin",
25524   "\t55\t138\t143",
25525   "\t54\t138\t143",
25526   "\t53\t138\t143",
25527   "\t52\t139\t143",
25528   "Gulf of Sidra",
25529   "\t33\t14\t20",
25530   "\t32\t14\t21",
25531   "\t31\t14\t21",
25532   "\t30\t14\t21",
25533   "\t29\t16\t21",
25534   "Gulf of St. Lawrence",
25535   "\t52\t-59\t-55",
25536   "\t51\t-65\t-55",
25537   "\t50\t-65\t-55",
25538   "\t49\t-67\t-56",
25539   "\t48\t-67\t-53",
25540   "\t47\t-67\t-53",
25541   "\t46\t-67\t-53",
25542   "\t45\t-65\t-54",
25543   "\t44\t-64\t-60",
25544   "Gulf of Suez",
25545   "\t30\t31\t34",
25546   "\t29\t31\t34",
25547   "\t28\t31\t35",
25548   "\t27\t31\t35",
25549   "\t26\t32\t35",
25550   "Gulf of Thailand",
25551   "\t14\t98\t101",
25552   "\t13\t98\t103",
25553   "\t12\t98\t104",
25554   "\t11\t98\t106",
25555   "\t10\t98\t106",
25556   "\t9\t98\t106",
25557   "\t8\t98\t106",
25558   "\t7\t98\t105",
25559   "\t6\t99\t104",
25560   "\t5\t99\t103",
25561   "Gulf of Tomini",
25562   "\t1\t119\t124",
25563   "\t0\t119\t124",
25564   "\t-1\t119\t124",
25565   "\t-2\t119\t122",
25566   "Gulf of Tonkin",
25567   "\t22\t105\t110",
25568   "\t21\t105\t111",
25569   "\t20\t104\t111",
25570   "\t19\t104\t111",
25571   "\t18\t104\t111",
25572   "\t17\t104\t109",
25573   "\t16\t105\t108",
25574   "Gulf of Yana",
25575   "\t76\t135\t138",
25576   "\t75\t135\t141",
25577   "\t74\t135\t141",
25578   "\t73\t133\t142",
25579   "\t72\t131\t142",
25580   "\t71\t131\t142",
25581   "\t70\t131\t140",
25582   "Gulf St. Vincent",
25583   "\t-31\t136\t138",
25584   "\t-32\t135\t138",
25585   "\t-33\t134\t139",
25586   "\t-34\t134\t139",
25587   "\t-35\t134\t139",
25588   "\t-36\t135\t139",
25589   "Hadley Bay",
25590   "\t74\t-109\t-107",
25591   "\t73\t-109\t-106",
25592   "\t72\t-109\t-106",
25593   "\t71\t-109\t-106",
25594   "\t70\t-109\t-106",
25595   "Hall Basin",
25596   "\t83\t-63\t-61",
25597   "\t82\t-69\t-60",
25598   "\t81\t-69\t-60",
25599   "\t80\t-69\t-60",
25600   "\t79\t-64\t-62",
25601   "Halmahera Sea",
25602   "\t1\t126\t131",
25603   "\t0\t126\t132",
25604   "\t-1\t126\t132",
25605   "\t-2\t126\t132",
25606   "Hamilton Inlet",
25607   "\t55\t-59\t-56",
25608   "\t54\t-61\t-56",
25609   "\t53\t-61\t-56",
25610   "\t52\t-61\t-57",
25611   "Hangzhou Bay",
25612   "\t31\t119\t123",
25613   "\t30\t119\t123",
25614   "\t29\t119\t123",
25615   "\t28\t120\t123",
25616   "Hecate Straight",
25617   "\t56\t-133\t-129",
25618   "\t55\t-133\t-128",
25619   "\t54\t-133\t-128",
25620   "\t53\t-133\t-128",
25621   "\t52\t-133\t-128",
25622   "\t51\t-132\t-129",
25623   "Hudson Bay",
25624   "\t67\t-87\t-84",
25625   "\t66\t-88\t-84",
25626   "\t65\t-94\t-81",
25627   "\t64\t-94\t-78",
25628   "\t63\t-94\t-77",
25629   "\t62\t-95\t-76",
25630   "\t61\t-95\t-76",
25631   "\t60\t-95\t-76",
25632   "\t59\t-95\t-76",
25633   "\t58\t-95\t-75",
25634   "\t57\t-95\t-75",
25635   "\t56\t-93\t-75",
25636   "\t55\t-93\t-75",
25637   "\t54\t-88\t-75",
25638   "\t53\t-83\t-77",
25639   "Hudson Strait",
25640   "\t65\t-79\t-71",
25641   "\t64\t-81\t-69",
25642   "\t63\t-81\t-64",
25643   "\t62\t-81\t-63",
25644   "\t61\t-79\t-63",
25645   "\t60\t-73\t-63",
25646   "\t59\t-71\t-63",
25647   "IJsselmeer",
25648   "\t54\t4\t6",
25649   "\t53\t3\t6",
25650   "\t52\t3\t6",
25651   "\t51\t3\t6",
25652   "Indian Ocean",
25653   "\t11\t49\t53",
25654   "\t10\t49\t55",
25655   "\t9\t49\t57",
25656   "\t8\t48\t59\t80\t85",
25657   "\t7\t48\t61\t79\t92",
25658   "\t6\t47\t64\t78\t96",
25659   "\t5\t46\t66\t77\t97",
25660   "\t4\t45\t68\t75\t98",
25661   "\t3\t44\t70\t74\t99",
25662   "\t2\t43\t99",
25663   "\t1\t40\t101",
25664   "\t0\t39\t101",
25665   "\t-1\t39\t102",
25666   "\t-2\t38\t103",
25667   "\t-3\t38\t104",
25668   "\t-4\t37\t105",
25669   "\t-5\t37\t107",
25670   "\t-6\t37\t111",
25671   "\t-7\t37\t119",
25672   "\t-8\t38\t120",
25673   "\t-9\t38\t123",
25674   "\t-10\t38\t125",
25675   "\t-11\t38\t126",
25676   "\t-12\t43\t127",
25677   "\t-13\t48\t127",
25678   "\t-14\t48\t127",
25679   "\t-15\t48\t127",
25680   "\t-16\t48\t126",
25681   "\t-17\t48\t125",
25682   "\t-18\t47\t124",
25683   "\t-19\t47\t123",
25684   "\t-20\t47\t122",
25685   "\t-21\t46\t120",
25686   "\t-22\t46\t117",
25687   "\t-23\t46\t115",
25688   "\t-24\t38\t114",
25689   "\t-25\t31\t114",
25690   "\t-26\t31\t115",
25691   "\t-27\t30\t115",
25692   "\t-28\t30\t115",
25693   "\t-29\t29\t116",
25694   "\t-30\t28\t116",
25695   "\t-31\t27\t116",
25696   "\t-32\t22\t116",
25697   "\t-33\t18\t117",
25698   "\t-34\t18\t120",
25699   "\t-35\t18\t124",
25700   "\t-36\t18\t127",
25701   "\t-37\t18\t130",
25702   "\t-38\t18\t134",
25703   "\t-39\t18\t137",
25704   "\t-40\t18\t140",
25705   "\t-41\t18\t144",
25706   "\t-42\t18\t148",
25707   "\t-43\t18\t151",
25708   "\t-44\t18\t153",
25709   "\t-45\t18\t156",
25710   "\t-46\t18\t159",
25711   "\t-47\t18\t161",
25712   "\t-48\t18\t164",
25713   "\t-49\t18\t167",
25714   "\t-50\t18\t167",
25715   "\t-51\t18\t167",
25716   "\t-52\t18\t167",
25717   "\t-53\t18\t167",
25718   "\t-54\t18\t167",
25719   "\t-55\t18\t167",
25720   "\t-56\t18\t167",
25721   "\t-57\t18\t167",
25722   "\t-58\t18\t167",
25723   "\t-59\t18\t167",
25724   "\t-60\t18\t167",
25725   "\t-61\t18\t167",
25726   "Inner Sea",
25727   "\t35\t129\t136",
25728   "\t34\t129\t136",
25729   "\t33\t129\t136",
25730   "\t32\t129\t136",
25731   "\t31\t130\t133",
25732   "Inner Seas",
25733   "\t59\t-7\t-4",
25734   "\t58\t-8\t-4",
25735   "\t57\t-8\t-3",
25736   "\t56\t-9\t-3",
25737   "\t55\t-9\t-3",
25738   "\t54\t-9\t-3",
25739   "\t53\t-8\t-4",
25740   "Internal Canada (B.C.) Waters",
25741   "\t54\t-130\t-126",
25742   "\t53\t-130\t-126",
25743   "\t52\t-130\t-126",
25744   "\t51\t-130\t-126",
25745   "Internal Canada (B.C.) Waters",
25746   "\t55\t-133\t-131",
25747   "\t54\t-133\t-131",
25748   "\t53\t-133\t-131",
25749   "\t52\t-133\t-131",
25750   "Internal Canada (B.C.) Waters",
25751   "\t56\t-131\t-128",
25752   "\t55\t-131\t-128",
25753   "\t54\t-131\t-128",
25754   "\t53\t-131\t-129",
25755   "Internal Canada Arctic Waters",
25756   "\t72\t-119\t-116",
25757   "\t71\t-119\t-116",
25758   "\t70\t-119\t-116",
25759   "Internal Canada Arctic Waters",
25760   "\t69\t-108\t-104",
25761   "\t68\t-108\t-104",
25762   "\t67\t-108\t-104",
25763   "Internal Canada Arctic Waters",
25764   "\t71\t-82\t-79",
25765   "\t70\t-82\t-78",
25766   "\t69\t-82\t-78",
25767   "\t68\t-82\t-78",
25768   "Internal Denmark Waters",
25769   "\t55\t9\t13",
25770   "\t54\t9\t13",
25771   "\t53\t9\t13",
25772   "\t52\t9\t12",
25773   "Internal Philippines Waters",
25774   "\t11\t124\t126",
25775   "\t10\t124\t126",
25776   "\t9\t124\t126",
25777   "\t8\t124\t126",
25778   "Internal Philippines Waters",
25779   "\t14\t121\t124",
25780   "\t13\t121\t124",
25781   "\t12\t121\t124",
25782   "\t11\t122\t124",
25783   "Internal U.S. (Alaska) Waters",
25784   "\t60\t-138\t-134",
25785   "\t59\t-138\t-132",
25786   "\t58\t-138\t-131",
25787   "\t57\t-138\t-130",
25788   "\t56\t-136\t-129",
25789   "\t55\t-135\t-129",
25790   "\t54\t-134\t-129",
25791   "Ionian Sea",
25792   "\t41\t15\t18",
25793   "\t40\t15\t22",
25794   "\t39\t14\t24",
25795   "\t38\t14\t24",
25796   "\t37\t14\t24",
25797   "\t36\t14\t23",
25798   "\t35\t14\t23",
25799   "Irish Sea",
25800   "\t55\t-7\t-1",
25801   "\t54\t-7\t-1",
25802   "\t53\t-7\t-1",
25803   "\t52\t-7\t-1",
25804   "\t51\t-7\t-2",
25805   "\t50\t-7\t-4",
25806   "James Bay",
25807   "\t55\t-83\t-77",
25808   "\t54\t-83\t-77",
25809   "\t53\t-83\t-77",
25810   "\t52\t-83\t-77",
25811   "\t51\t-83\t-77",
25812   "\t50\t-81\t-77",
25813   "\t49\t-80\t-78",
25814   "Java Sea",
25815   "\t-1\t105\t114",
25816   "\t-2\t104\t117",
25817   "\t-3\t104\t119",
25818   "\t-4\t103\t120",
25819   "\t-5\t103\t120",
25820   "\t-6\t103\t120",
25821   "\t-7\t104\t119",
25822   "\t-8\t111\t118",
25823   "Jones Sound",
25824   "\t77\t-92\t-77",
25825   "\t76\t-92\t-77",
25826   "\t75\t-92\t-77",
25827   "\t74\t-91\t-78",
25828   "Joseph Bonaparte Gulf",
25829   "\t-12\t126\t130",
25830   "\t-13\t126\t130",
25831   "\t-14\t126\t130",
25832   "\t-15\t126\t130",
25833   "\t-16\t127\t130",
25834   "Kaliningrad",
25835   "\t56\t19\t22",
25836   "\t55\t19\t22",
25837   "\t54\t19\t22",
25838   "\t53\t19\t22",
25839   "Kane Basin",
25840   "\t81\t-73\t-63",
25841   "\t80\t-79\t-63",
25842   "\t79\t-79\t-63",
25843   "\t78\t-79\t-63",
25844   "\t77\t-79\t-67",
25845   "Kangertittivaq",
25846   "\t72\t-29\t-23",
25847   "\t71\t-30\t-20",
25848   "\t70\t-30\t-20",
25849   "\t69\t-30\t-20",
25850   "\t68\t-28\t-26",
25851   "Kara Sea",
25852   "\t82\t64\t96",
25853   "\t81\t64\t98",
25854   "\t80\t64\t103",
25855   "\t79\t64\t103",
25856   "\t78\t65\t103",
25857   "\t77\t65\t102",
25858   "\t76\t59\t102",
25859   "\t75\t56\t102",
25860   "\t74\t55\t100",
25861   "\t73\t54\t88",
25862   "\t72\t54\t88",
25863   "\t71\t54\t80",
25864   "\t70\t54\t69\t74\t80",
25865   "\t69\t55\t70\t77\t80",
25866   "\t68\t59\t70",
25867   "\t67\t65\t70",
25868   "Karaginskiy Gulf",
25869   "\t61\t162\t167",
25870   "\t60\t161\t167",
25871   "\t59\t160\t167",
25872   "\t58\t160\t167",
25873   "\t57\t160\t166",
25874   "\t56\t161\t164",
25875   "Karskiye Strait",
25876   "\t71\t56\t60",
25877   "\t70\t56\t60",
25878   "\t69\t56\t60",
25879   "Kattegat",
25880   "\t59\t10\t12",
25881   "\t58\t9\t13",
25882   "\t57\t9\t13",
25883   "\t56\t9\t13",
25884   "\t55\t9\t13",
25885   "\t54\t10\t12",
25886   "Kennedy Channel",
25887   "\t82\t-67\t-63",
25888   "\t81\t-68\t-63",
25889   "\t80\t-68\t-63",
25890   "\t79\t-68\t-63",
25891   "Khatanga Gulf",
25892   "\t76\t111\t114",
25893   "\t75\t108\t114",
25894   "\t74\t105\t114",
25895   "\t73\t104\t114",
25896   "\t72\t104\t113",
25897   "\t71\t104\t107",
25898   "Korea Strait",
25899   "\t37\t128\t131",
25900   "\t36\t126\t133",
25901   "\t35\t125\t133",
25902   "\t34\t125\t133",
25903   "\t33\t125\t133",
25904   "\t32\t125\t131",
25905   "\t31\t126\t130",
25906   "Kotzebue Sound",
25907   "\t68\t-164\t-160",
25908   "\t67\t-165\t-159",
25909   "\t66\t-165\t-159",
25910   "\t65\t-165\t-159",
25911   "Kronotskiy Gulf",
25912   "\t55\t158\t162",
25913   "\t54\t158\t162",
25914   "\t53\t158\t162",
25915   "\t52\t158\t161",
25916   "La Perouse Strait",
25917   "\t47\t140\t142",
25918   "\t46\t140\t143",
25919   "\t45\t140\t143",
25920   "\t44\t140\t143",
25921   "Labrador Sea",
25922   "\t61\t-65\t-43",
25923   "\t60\t-65\t-42",
25924   "\t59\t-65\t-42",
25925   "\t58\t-65\t-42",
25926   "\t57\t-64\t-43",
25927   "\t56\t-63\t-43",
25928   "\t55\t-63\t-44",
25929   "\t54\t-62\t-45",
25930   "\t53\t-60\t-46",
25931   "\t52\t-58\t-46",
25932   "\t51\t-57\t-47",
25933   "\t50\t-57\t-48",
25934   "\t49\t-57\t-49",
25935   "\t48\t-57\t-49",
25936   "\t47\t-55\t-50",
25937   "\t46\t-54\t-51",
25938   "Laccadive Sea",
25939   "\t15\t73\t75",
25940   "\t14\t70\t75",
25941   "\t13\t70\t76",
25942   "\t12\t70\t76",
25943   "\t11\t70\t77",
25944   "\t10\t70\t77",
25945   "\t9\t70\t79",
25946   "\t8\t70\t80",
25947   "\t7\t70\t81",
25948   "\t6\t70\t81",
25949   "\t5\t71\t81",
25950   "\t4\t71\t80",
25951   "\t3\t71\t79",
25952   "\t2\t71\t78",
25953   "\t1\t71\t76",
25954   "\t0\t71\t75",
25955   "\t-1\t71\t74",
25956   "Lago de Maracaibo",
25957   "\t11\t-72\t-70",
25958   "\t10\t-73\t-70",
25959   "\t9\t-73\t-70",
25960   "\t8\t-73\t-70",
25961   "Lake Baikal",
25962   "\t56\t107\t110",
25963   "\t55\t107\t110",
25964   "\t54\t105\t110",
25965   "\t53\t104\t110",
25966   "\t52\t102\t110",
25967   "\t51\t102\t109",
25968   "\t50\t102\t107",
25969   "Lake Chad",
25970   "\t14\t13\t15",
25971   "\t13\t13\t15",
25972   "\t12\t13\t15",
25973   "\t11\t13\t15",
25974   "Lake Champlain",
25975   "\t46\t-74\t-72",
25976   "\t45\t-74\t-72",
25977   "\t44\t-74\t-72",
25978   "\t43\t-74\t-72",
25979   "\t42\t-74\t-72",
25980   "Lake Erie",
25981   "\t44\t-80\t-77",
25982   "\t43\t-84\t-77",
25983   "\t42\t-84\t-77",
25984   "\t41\t-84\t-77",
25985   "\t40\t-84\t-79",
25986   "Lake Huron",
25987   "\t47\t-82\t-80",
25988   "\t46\t-82\t-78",
25989   "\t45\t-82\t-78",
25990   "\t44\t-82\t-78",
25991   "\t43\t-82\t-78",
25992   "Lake Huron",
25993   "\t47\t-85\t-80",
25994   "\t46\t-85\t-78",
25995   "\t45\t-85\t-78",
25996   "\t44\t-85\t-78",
25997   "\t43\t-84\t-78",
25998   "\t42\t-84\t-80",
25999   "Lake Huron",
26000   "\t47\t-84\t-80",
26001   "\t46\t-84\t-80",
26002   "\t45\t-84\t-80",
26003   "\t44\t-84\t-80",
26004   "Lake Huron",
26005   "\t45\t-84\t-82",
26006   "\t44\t-84\t-82",
26007   "\t43\t-84\t-82",
26008   "\t42\t-84\t-82",
26009   "Lake Malawi",
26010   "\t-8\t32\t35",
26011   "\t-9\t32\t35",
26012   "\t-10\t32\t35",
26013   "\t-11\t32\t35",
26014   "\t-12\t33\t36",
26015   "\t-13\t33\t36",
26016   "\t-14\t33\t36",
26017   "\t-15\t33\t36",
26018   "Lake Michigan",
26019   "\t47\t-86\t-84",
26020   "\t46\t-88\t-83",
26021   "\t45\t-89\t-83",
26022   "\t44\t-89\t-83",
26023   "\t43\t-89\t-84",
26024   "\t42\t-88\t-85",
26025   "\t41\t-88\t-85",
26026   "\t40\t-88\t-85",
26027   "Lake Okeechobee",
26028   "\t28\t-82\t-79",
26029   "\t27\t-82\t-79",
26030   "\t26\t-82\t-79",
26031   "\t25\t-82\t-79",
26032   "Lake Ontario",
26033   "\t45\t-78\t-74",
26034   "\t44\t-80\t-74",
26035   "\t43\t-80\t-74",
26036   "\t42\t-80\t-75",
26037   "Lake Pontchartrain",
26038   "\t31\t-91\t-88",
26039   "\t30\t-91\t-88",
26040   "\t29\t-91\t-88",
26041   "Lake Saint Clair",
26042   "\t43\t-84\t-81",
26043   "\t42\t-84\t-81",
26044   "\t41\t-84\t-81",
26045   "Lake Shasta",
26046   "\t41\t-123\t-121",
26047   "\t40\t-123\t-121",
26048   "\t39\t-123\t-121",
26049   "Lake Superior",
26050   "\t50\t-89\t-87",
26051   "\t49\t-90\t-84",
26052   "\t48\t-92\t-83",
26053   "\t47\t-93\t-83",
26054   "\t46\t-93\t-83",
26055   "\t45\t-93\t-83",
26056   "Lake Superior",
26057   "\t48\t-85\t-83",
26058   "\t47\t-86\t-83",
26059   "\t46\t-86\t-83",
26060   "\t45\t-86\t-83",
26061   "Lake Tahoe",
26062   "\t40\t-121\t-118",
26063   "\t39\t-121\t-118",
26064   "\t38\t-121\t-118",
26065   "\t37\t-121\t-118",
26066   "Lake Tanganyika",
26067   "\t-2\t28\t30",
26068   "\t-3\t28\t30",
26069   "\t-4\t28\t30",
26070   "\t-5\t28\t31",
26071   "\t-6\t28\t31",
26072   "\t-7\t28\t32",
26073   "\t-8\t28\t32",
26074   "\t-9\t29\t32",
26075   "Lake Victoria",
26076   "\t1\t30\t35",
26077   "\t0\t30\t35",
26078   "\t-1\t30\t35",
26079   "\t-2\t30\t35",
26080   "\t-3\t30\t34",
26081   "\t-4\t31\t33",
26082   "Lake Winnipeg",
26083   "\t55\t-99\t-96",
26084   "\t54\t-100\t-96",
26085   "\t53\t-100\t-95",
26086   "\t52\t-100\t-95",
26087   "\t51\t-99\t-95",
26088   "\t50\t-99\t-95",
26089   "\t49\t-97\t-95",
26090   "Laptev Sea",
26091   "\t82\t95\t101",
26092   "\t81\t95\t109",
26093   "\t80\t95\t118",
26094   "\t79\t95\t126",
26095   "\t78\t96\t134",
26096   "\t77\t101\t139",
26097   "\t76\t103\t139",
26098   "\t75\t104\t139",
26099   "\t74\t111\t138",
26100   "\t73\t111\t137",
26101   "\t72\t111\t137",
26102   "\t71\t112\t114\t126\t136",
26103   "\t70\t127\t134",
26104   "\t69\t129\t132",
26105   "Leyte Gulf",
26106   "\t12\t124\t126",
26107   "\t11\t124\t126",
26108   "\t10\t124\t126",
26109   "\t9\t124\t126",
26110   "Liddon Gulf",
26111   "\t76\t-115\t-110",
26112   "\t75\t-116\t-110",
26113   "\t74\t-116\t-110",
26114   "\t73\t-116\t-111",
26115   "Ligurian Sea",
26116   "\t45\t7\t10",
26117   "\t44\t6\t10",
26118   "\t43\t6\t10",
26119   "\t42\t6\t10",
26120   "Lincoln Sea",
26121   "\t84\t-70\t-36",
26122   "\t83\t-70\t-36",
26123   "\t82\t-70\t-36",
26124   "\t81\t-69\t-37",
26125   "\t80\t-54\t-48\t-46\t-43",
26126   "Long Island Sound",
26127   "\t42\t-74\t-71",
26128   "\t41\t-74\t-71",
26129   "\t40\t-74\t-71",
26130   "\t39\t-74\t-71",
26131   "Lutzow-Holm Bay",
26132   "\t-67\t32\t41",
26133   "\t-68\t32\t41",
26134   "\t-69\t32\t41",
26135   "\t-70\t32\t40",
26136   "\t-71\t37\t39",
26137   "Luzon Strait",
26138   "\t23\t119\t121",
26139   "\t22\t119\t122",
26140   "\t21\t119\t123",
26141   "\t20\t119\t123",
26142   "\t19\t119\t123",
26143   "\t18\t119\t123",
26144   "\t17\t119\t123",
26145   "M'Clure Strait",
26146   "\t77\t-123\t-119",
26147   "\t76\t-124\t-114",
26148   "\t75\t-125\t-113",
26149   "\t74\t-125\t-113",
26150   "\t73\t-125\t-113",
26151   "\t72\t-116\t-114",
26152   "Mackenzie Bay",
26153   "\t70\t-140\t-133",
26154   "\t69\t-140\t-133",
26155   "\t68\t-140\t-133",
26156   "\t67\t-138\t-134",
26157   "Makassar Strait",
26158   "\t2\t116\t122",
26159   "\t1\t116\t122",
26160   "\t0\t115\t122",
26161   "\t-1\t115\t121",
26162   "\t-2\t115\t120",
26163   "\t-3\t115\t120",
26164   "\t-4\t115\t120",
26165   "\t-5\t116\t120",
26166   "\t-6\t118\t120",
26167   "Marguerite Bay",
26168   "\t-66\t-70\t-65",
26169   "\t-67\t-71\t-65",
26170   "\t-68\t-71\t-65",
26171   "\t-69\t-71\t-65",
26172   "\t-70\t-71\t-65",
26173   "Massachusetts Bay",
26174   "\t43\t-72\t-69",
26175   "\t42\t-72\t-69",
26176   "\t41\t-72\t-69",
26177   "\t40\t-71\t-69",
26178   "Matochkin Shar Strait",
26179   "\t74\t53\t57",
26180   "\t73\t53\t57",
26181   "\t72\t53\t57",
26182   "McMurdo Sound",
26183   "\t-71\t165\t167",
26184   "\t-72\t163\t170",
26185   "\t-73\t161\t170",
26186   "\t-74\t159\t170",
26187   "\t-75\t159\t170",
26188   "\t-76\t159\t170",
26189   "\t-77\t161\t170",
26190   "\t-78\t161\t170",
26191   "\t-79\t162\t166",
26192   "Mecklenburger Bucht",
26193   "\t55\t9\t13",
26194   "\t54\t9\t13",
26195   "\t53\t9\t13",
26196   "\t52\t9\t12",
26197   "Mediterranean Sea",
26198   "\t38\t10\t15",
26199   "\t37\t9\t24\t26\t37",
26200   "\t36\t9\t37",
26201   "\t35\t9\t37",
26202   "\t34\t9\t36",
26203   "\t33\t9\t36",
26204   "\t32\t9\t36",
26205   "\t31\t11\t36",
26206   "\t30\t23\t35",
26207   "\t29\t27\t30",
26208   "Mediterranean Sea",
26209   "\t44\t4\t10",
26210   "\t43\t3\t10",
26211   "\t42\t2\t10",
26212   "\t41\t2\t10",
26213   "\t40\t2\t10",
26214   "\t39\t-1\t11",
26215   "\t38\t-2\t13",
26216   "\t37\t-3\t13",
26217   "\t36\t-3\t13",
26218   "\t35\t-3\t11",
26219   "\t34\t-2\t1",
26220   "Melville Bay",
26221   "\t77\t-68\t-59",
26222   "\t76\t-68\t-56",
26223   "\t75\t-68\t-55",
26224   "\t74\t-67\t-55",
26225   "\t73\t-62\t-55",
26226   "Minto Inlet",
26227   "\t72\t-119\t-114",
26228   "\t71\t-119\t-114",
26229   "\t70\t-119\t-114",
26230   "Molucca Sea",
26231   "\t5\t125\t127",
26232   "\t4\t124\t128",
26233   "\t3\t124\t129",
26234   "\t2\t123\t129",
26235   "\t1\t122\t129",
26236   "\t0\t122\t129",
26237   "\t-1\t122\t129",
26238   "\t-2\t122\t128",
26239   "Monterey Bay",
26240   "\t37\t-123\t-120",
26241   "\t36\t-123\t-120",
26242   "\t35\t-123\t-120",
26243   "Mozambique Channel",
26244   "\t-9\t39\t44",
26245   "\t-10\t39\t49",
26246   "\t-11\t39\t50",
26247   "\t-12\t39\t50",
26248   "\t-13\t39\t50",
26249   "\t-14\t39\t49",
26250   "\t-15\t38\t48",
26251   "\t-16\t35\t48",
26252   "\t-17\t34\t46",
26253   "\t-18\t33\t45",
26254   "\t-19\t33\t45",
26255   "\t-20\t33\t45",
26256   "\t-21\t33\t45",
26257   "\t-22\t34\t44",
26258   "\t-23\t33\t45",
26259   "\t-24\t31\t46",
26260   "\t-25\t31\t46",
26261   "\t-26\t31\t46",
26262   "\t-27\t31\t39",
26263   "Murchison Sound",
26264   "\t79\t-73\t-71",
26265   "\t78\t-73\t-65",
26266   "\t77\t-73\t-65",
26267   "\t76\t-73\t-65",
26268   "North Sea",
26269   "\t61\t-2\t7",
26270   "\t60\t-3\t7",
26271   "\t59\t-4\t8",
26272   "\t58\t-5\t10",
26273   "\t57\t-5\t10",
26274   "\t56\t-5\t10",
26275   "\t55\t-4\t10",
26276   "\t54\t-4\t10",
26277   "\t53\t-2\t10",
26278   "\t52\t-1\t10",
26279   "\t51\t-1\t5",
26280   "\t50\t-1\t5",
26281   "\t49\t0\t2",
26282   "Norton Sound",
26283   "\t65\t-165\t-159",
26284   "\t64\t-165\t-159",
26285   "\t63\t-165\t-159",
26286   "\t62\t-165\t-159",
26287   "Norwegian Sea",
26288   "\t77\t13\t19",
26289   "\t76\t9\t21",
26290   "\t75\t4\t23",
26291   "\t74\t-1\t25",
26292   "\t73\t-5\t27",
26293   "\t72\t-9\t28",
26294   "\t71\t-10\t28",
26295   "\t70\t-11\t28",
26296   "\t69\t-12\t28",
26297   "\t68\t-12\t24",
26298   "\t67\t-13\t18",
26299   "\t66\t-14\t15",
26300   "\t65\t-14\t15",
26301   "\t64\t-14\t14",
26302   "\t63\t-14\t12",
26303   "\t62\t-12\t10",
26304   "\t61\t-9\t9",
26305   "\t60\t-7\t7",
26306   "\t59\t-4\t6",
26307   "Oresund",
26308   "\t57\t11\t13",
26309   "\t56\t11\t13",
26310   "\t55\t11\t13",
26311   "\t54\t11\t13",
26312   "Ozero Mogotoyevo",
26313   "\t73\t143\t147",
26314   "\t72\t143\t147",
26315   "\t71\t143\t147",
26316   "Pacific Ocean",
26317   "\t59\t-140\t-135",
26318   "\t58\t-146\t-134",
26319   "\t57\t-153\t-132\t161\t164",
26320   "\t56\t-159\t-132\t161\t166",
26321   "\t55\t-164\t-131\t160\t168",
26322   "\t54\t-167\t-131\t157\t170",
26323   "\t53\t-172\t-130\t157\t172",
26324   "\t52\t-180\t-129\t156\t179",
26325   "\t51\t-180\t-126\t155\t180",
26326   "\t50\t-180\t-123\t154\t180",
26327   "\t49\t-180\t-123\t153\t180",
26328   "\t48\t-180\t-123\t152\t180",
26329   "\t47\t-180\t-122\t150\t180",
26330   "\t46\t-180\t-122\t148\t180",
26331   "\t45\t-180\t-122\t147\t180",
26332   "\t44\t-180\t-122\t143\t180",
26333   "\t43\t-180\t-123\t142\t180",
26334   "\t42\t-180\t-123\t141\t180",
26335   "\t41\t-180\t-123\t140\t180",
26336   "\t40\t-180\t-122\t140\t180",
26337   "\t39\t-180\t-121\t139\t180",
26338   "\t38\t-180\t-121\t139\t180",
26339   "\t37\t-180\t-120\t139\t180",
26340   "\t36\t-180\t-119\t138\t180",
26341   "\t35\t-180\t-117\t138\t180",
26342   "\t34\t-180\t-116\t138\t180",
26343   "\t33\t-180\t-115\t138\t180",
26344   "\t32\t-180\t-115\t138\t180",
26345   "\t31\t-180\t-114\t139\t180",
26346   "\t30\t-180\t-113\t139\t180",
26347   "\t29\t-180\t-113\t139\t180",
26348   "\t28\t-180\t-113\t140\t180",
26349   "\t27\t-180\t-111\t141\t180",
26350   "\t26\t-180\t-111\t141\t180",
26351   "\t25\t-180\t-110\t141\t180",
26352   "\t24\t-180\t-105\t140\t180",
26353   "\t23\t-180\t-104\t140\t180",
26354   "\t22\t-180\t-104\t140\t180",
26355   "\t21\t-180\t-104\t140\t180",
26356   "\t20\t-180\t-103\t140\t180",
26357   "\t19\t-180\t-101\t143\t180",
26358   "\t18\t-180\t-99\t144\t180",
26359   "\t17\t-180\t-97\t145\t180",
26360   "\t16\t-180\t-91\t145\t180",
26361   "\t15\t-180\t-90\t144\t180",
26362   "\t14\t-180\t-86\t144\t180",
26363   "\t13\t-180\t-85\t143\t180",
26364   "\t12\t-180\t-84\t141\t180",
26365   "\t11\t-180\t-84\t140\t180",
26366   "\t10\t-180\t-82\t138\t180",
26367   "\t9\t-180\t-80\t136\t180",
26368   "\t8\t-180\t-76\t135\t180",
26369   "\t7\t-180\t-76\t133\t180",
26370   "\t6\t-180\t-76\t132\t180",
26371   "\t5\t-180\t-76\t130\t180",
26372   "\t4\t-180\t-76\t128\t180",
26373   "\t3\t-180\t-76\t127\t180",
26374   "\t2\t-180\t-76\t127\t180",
26375   "\t1\t-180\t-76\t127\t180",
26376   "\t0\t-180\t-77\t128\t180",
26377   "\t-1\t-180\t-78\t128\t180",
26378   "Pacific Ocean",
26379   "\t4\t171\t173",
26380   "\t3\t-93\t-90\t170\t174",
26381   "\t2\t-93\t-89\t170\t174",
26382   "\t1\t-180\t-79\t130\t180",
26383   "\t0\t-180\t-79\t130\t180",
26384   "\t-1\t-180\t-79\t130\t180",
26385   "\t-2\t-180\t-79\t133\t180",
26386   "\t-3\t-180\t-79\t133\t142\t145\t180",
26387   "\t-4\t-180\t-79\t133\t136\t150\t180",
26388   "\t-5\t-180\t-78\t152\t180",
26389   "\t-6\t-180\t-78\t153\t180",
26390   "\t-7\t-180\t-77\t154\t180",
26391   "\t-8\t-180\t-77\t156\t180",
26392   "\t-9\t-180\t-76\t158\t180",
26393   "\t-10\t-180\t-76\t160\t180",
26394   "\t-11\t-180\t-75\t160\t180",
26395   "\t-12\t-180\t-75\t166\t180",
26396   "\t-13\t-180\t-74\t166\t180",
26397   "\t-14\t-180\t-73\t166\t180",
26398   "\t-15\t-180\t-71\t166\t180",
26399   "\t-16\t-180\t-69\t167\t180",
26400   "\t-17\t-180\t-69\t167\t180",
26401   "\t-18\t-180\t-69\t167\t180",
26402   "\t-19\t-180\t-69\t168\t180",
26403   "\t-20\t-180\t-69\t167\t180",
26404   "\t-21\t-180\t-69\t166\t180",
26405   "\t-22\t-180\t-69\t165\t180",
26406   "\t-23\t-180\t-69\t164\t180",
26407   "\t-24\t-180\t-69\t163\t180",
26408   "\t-25\t-180\t-69\t161\t180",
26409   "\t-26\t-180\t-69\t160\t180",
26410   "\t-27\t-180\t-69\t159\t180",
26411   "\t-28\t-180\t-69\t158\t180",
26412   "\t-29\t-180\t-70\t158\t180",
26413   "\t-30\t-180\t-70\t158\t180",
26414   "\t-31\t-180\t-70\t158\t180",
26415   "\t-32\t-180\t-70\t158\t180",
26416   "\t-33\t-180\t-70\t161\t180",
26417   "\t-34\t-180\t-70\t165\t180",
26418   "\t-35\t-180\t-70\t169\t180",
26419   "\t-36\t-180\t-71\t172\t180",
26420   "\t-37\t-180\t-71\t173\t180",
26421   "\t-38\t-180\t-72\t174\t180",
26422   "\t-39\t-180\t-72\t175\t180",
26423   "\t-40\t-180\t-72\t173\t180",
26424   "\t-41\t-180\t-72\t172\t180",
26425   "\t-42\t-180\t-72\t170\t180",
26426   "\t-43\t-180\t-72\t169\t180",
26427   "\t-44\t-180\t-72\t169\t180",
26428   "\t-45\t-180\t-72\t166\t180",
26429   "\t-46\t-180\t-72\t166\t180",
26430   "\t-47\t-180\t-72\t165\t180",
26431   "\t-48\t-180\t-72\t165\t180",
26432   "\t-49\t-180\t-72\t165\t180",
26433   "\t-50\t-180\t-71\t165\t180",
26434   "\t-51\t-180\t-71\t165\t180",
26435   "\t-52\t-180\t-71\t165\t180",
26436   "\t-53\t-180\t-68\t165\t180",
26437   "\t-54\t-180\t-67\t165\t180",
26438   "\t-55\t-180\t-67\t165\t180",
26439   "\t-56\t-180\t-67\t165\t180",
26440   "\t-57\t-180\t-67\t165\t180",
26441   "\t-58\t-180\t-67\t165\t180",
26442   "\t-59\t-180\t-67\t165\t180",
26443   "\t-60\t-180\t-67\t165\t180",
26444   "\t-61\t-180\t-67\t165\t180",
26445   "Palk Strait",
26446   "\t11\t78\t80",
26447   "\t10\t77\t81",
26448   "\t9\t77\t81",
26449   "\t8\t77\t81",
26450   "\t7\t78\t80",
26451   "Pamlico Sound",
26452   "\t36\t-78\t-74",
26453   "\t35\t-78\t-74",
26454   "\t34\t-78\t-74",
26455   "\t33\t-77\t-75",
26456   "Peacock Sound",
26457   "\t-71\t-103\t-94",
26458   "\t-72\t-103\t-94",
26459   "\t-73\t-103\t-94",
26460   "\t-74\t-102\t-94",
26461   "Persian Gulf",
26462   "\t31\t46\t51",
26463   "\t30\t46\t51",
26464   "\t29\t46\t52",
26465   "\t28\t46\t57",
26466   "\t27\t47\t58",
26467   "\t26\t47\t58",
26468   "\t25\t48\t58",
26469   "\t24\t49\t57",
26470   "\t23\t49\t55",
26471   "\t22\t50\t53",
26472   "Philippine Sea",
26473   "\t36\t135\t139",
26474   "\t35\t135\t140",
26475   "\t34\t132\t141",
26476   "\t33\t130\t141",
26477   "\t32\t129\t141",
26478   "\t31\t129\t141",
26479   "\t30\t129\t142",
26480   "\t29\t128\t143",
26481   "\t28\t127\t143",
26482   "\t27\t126\t143",
26483   "\t26\t126\t143",
26484   "\t25\t120\t143",
26485   "\t24\t120\t143",
26486   "\t23\t119\t143",
26487   "\t22\t119\t144",
26488   "\t21\t119\t146",
26489   "\t20\t119\t146",
26490   "\t19\t120\t147",
26491   "\t18\t121\t147",
26492   "\t17\t120\t147",
26493   "\t16\t120\t147",
26494   "\t15\t120\t147",
26495   "\t14\t120\t147",
26496   "\t13\t120\t146",
26497   "\t12\t121\t146",
26498   "\t11\t123\t145",
26499   "\t10\t124\t144",
26500   "\t9\t124\t142",
26501   "\t8\t125\t141",
26502   "\t7\t125\t139",
26503   "\t6\t124\t137",
26504   "\t5\t124\t136",
26505   "\t4\t124\t134",
26506   "\t3\t124\t133",
26507   "\t2\t124\t131",
26508   "\t1\t127\t129",
26509   "Porpoise Bay",
26510   "\t-65\t125\t131",
26511   "\t-66\t125\t131",
26512   "\t-67\t125\t131",
26513   "\t-68\t126\t130",
26514   "Prince ALbert Sound",
26515   "\t71\t-118\t-110",
26516   "\t70\t-118\t-110",
26517   "\t69\t-118\t-110",
26518   "Prince of Wales Strait",
26519   "\t74\t-118\t-113",
26520   "\t73\t-121\t-113",
26521   "\t72\t-121\t-113",
26522   "\t71\t-121\t-115",
26523   "\t70\t-121\t-117",
26524   "Prince William Sound",
26525   "\t62\t-149\t-145",
26526   "\t61\t-149\t-144",
26527   "\t60\t-149\t-144",
26528   "\t59\t-149\t-144",
26529   "\t58\t-148\t-146",
26530   "Prydz Bay",
26531   "\t-66\t68\t75",
26532   "\t-67\t68\t80",
26533   "\t-68\t66\t80",
26534   "\t-69\t66\t80",
26535   "\t-70\t66\t78",
26536   "\t-71\t65\t74",
26537   "\t-72\t65\t72",
26538   "\t-73\t65\t71",
26539   "\t-74\t65\t68",
26540   "Puget Sound",
26541   "\t49\t-123\t-121",
26542   "\t48\t-124\t-121",
26543   "\t47\t-124\t-121",
26544   "\t46\t-124\t-121",
26545   "Qiongzhou Strait",
26546   "\t21\t108\t111",
26547   "\t20\t108\t111",
26548   "\t19\t108\t111",
26549   "\t18\t108\t110",
26550   "Queen Charlotte Sound",
26551   "\t54\t-130\t-128",
26552   "\t53\t-132\t-127",
26553   "\t52\t-132\t-126",
26554   "\t51\t-132\t-126",
26555   "\t50\t-132\t-126",
26556   "\t49\t-130\t-126",
26557   "Queen Charlotte Straight",
26558   "\t52\t-128\t-125",
26559   "\t51\t-128\t-123",
26560   "\t50\t-128\t-123",
26561   "\t49\t-128\t-123",
26562   "Ragay Gulf",
26563   "\t14\t121\t124",
26564   "\t13\t121\t124",
26565   "\t12\t121\t124",
26566   "\t11\t122\t124",
26567   "Red Sea",
26568   "\t29\t33\t36",
26569   "\t28\t32\t36",
26570   "\t27\t32\t37",
26571   "\t26\t32\t38",
26572   "\t25\t32\t39",
26573   "\t24\t33\t39",
26574   "\t23\t34\t40",
26575   "\t22\t34\t40",
26576   "\t21\t34\t41",
26577   "\t20\t35\t42",
26578   "\t19\t36\t42",
26579   "\t18\t36\t43",
26580   "\t17\t36\t43",
26581   "\t16\t37\t43",
26582   "\t15\t38\t44",
26583   "\t14\t38\t44",
26584   "\t13\t39\t44",
26585   "\t12\t40\t44",
26586   "\t11\t41\t44",
26587   "Richard Collinson Inlet",
26588   "\t74\t-115\t-113",
26589   "\t73\t-115\t-112",
26590   "\t72\t-115\t-112",
26591   "\t71\t-115\t-112",
26592   "Rio de la Plata",
26593   "\t-31\t-59\t-57",
26594   "\t-32\t-59\t-57",
26595   "\t-33\t-59\t-53",
26596   "\t-34\t-59\t-53",
26597   "\t-35\t-59\t-53",
26598   "\t-36\t-58\t-54",
26599   "\t-37\t-58\t-55",
26600   "Robeson Channel",
26601   "\t83\t-63\t-56",
26602   "\t82\t-63\t-55",
26603   "\t81\t-63\t-55",
26604   "\t80\t-62\t-55",
26605   "Ronne Entrance",
26606   "\t-70\t-76\t-74",
26607   "\t-71\t-76\t-72",
26608   "\t-72\t-76\t-72",
26609   "\t-73\t-76\t-72",
26610   "Ross Sea",
26611   "\t-70\t169\t180",
26612   "\t-71\t167\t180",
26613   "\t-72\t167\t180",
26614   "\t-73\t167\t180",
26615   "\t-74\t168\t180",
26616   "\t-75\t168\t180",
26617   "\t-76\t165\t180",
26618   "\t-77\t160\t180",
26619   "\t-78\t158\t180",
26620   "\t-79\t157\t180",
26621   "\t-80\t157\t180",
26622   "\t-81\t157\t180",
26623   "\t-82\t159\t180",
26624   "\t-83\t160\t180",
26625   "\t-84\t166\t180",
26626   "\t-85\t176\t180",
26627   "Ross Sea",
26628   "\t-70\t-180\t-101",
26629   "\t-71\t-180\t-101",
26630   "\t-72\t-180\t-101",
26631   "\t-73\t-180\t-101",
26632   "\t-74\t-180\t-107",
26633   "\t-75\t-180\t-130\t-125\t-113",
26634   "\t-76\t-180\t-135",
26635   "\t-77\t-180\t-144",
26636   "\t-78\t-180\t-147",
26637   "\t-79\t-180\t-147",
26638   "\t-80\t-180\t-147",
26639   "\t-81\t-180\t-147",
26640   "\t-82\t-180\t-149",
26641   "\t-83\t-180\t-152",
26642   "\t-84\t-180\t-155",
26643   "\t-85\t-180\t-155",
26644   "\t-86\t-159\t-155",
26645   "Salton Sea",
26646   "\t34\t-117\t-114",
26647   "\t33\t-117\t-114",
26648   "\t32\t-117\t-114",
26649   "Samar Sea",
26650   "\t14\t122\t124",
26651   "\t13\t122\t125",
26652   "\t12\t122\t126",
26653   "\t11\t122\t126",
26654   "\t10\t123\t126",
26655   "San Francisco Bay",
26656   "\t39\t-123\t-120",
26657   "\t38\t-123\t-120",
26658   "\t37\t-123\t-120",
26659   "\t36\t-123\t-121",
26660   "Sargasso Sea",
26661   "\t36\t-68\t-51",
26662   "\t35\t-69\t-50",
26663   "\t34\t-70\t-49",
26664   "\t33\t-70\t-49",
26665   "\t32\t-71\t-49",
26666   "\t31\t-71\t-49",
26667   "\t30\t-71\t-49",
26668   "\t29\t-71\t-49",
26669   "\t28\t-71\t-49",
26670   "\t27\t-71\t-49",
26671   "\t26\t-71\t-49",
26672   "\t25\t-71\t-49",
26673   "\t24\t-71\t-49",
26674   "\t23\t-70\t-49",
26675   "\t22\t-69\t-49",
26676   "\t21\t-68\t-49",
26677   "\t20\t-67\t-49",
26678   "\t19\t-64\t-50",
26679   "Savu Sea",
26680   "\t-7\t117\t126",
26681   "\t-8\t117\t126",
26682   "\t-9\t117\t126",
26683   "\t-10\t117\t125",
26684   "\t-11\t119\t124",
26685   "Scotia Sea",
26686   "\t-50\t-59\t-53",
26687   "\t-51\t-60\t-46",
26688   "\t-52\t-60\t-40",
26689   "\t-53\t-60\t-35",
26690   "\t-54\t-60\t-35",
26691   "\t-55\t-59\t-35",
26692   "\t-56\t-59\t-36",
26693   "\t-57\t-58\t-37",
26694   "\t-58\t-58\t-39",
26695   "\t-59\t-57\t-40",
26696   "\t-60\t-57\t-41",
26697   "\t-61\t-56\t-43",
26698   "\t-62\t-56\t-49",
26699   "Sea of Azov",
26700   "\t48\t36\t40",
26701   "\t47\t33\t40",
26702   "\t46\t33\t40",
26703   "\t45\t33\t39",
26704   "\t44\t33\t39",
26705   "Sea of Crete",
26706   "\t39\t22\t24",
26707   "\t38\t21\t25",
26708   "\t37\t21\t29",
26709   "\t36\t21\t29",
26710   "\t35\t22\t29",
26711   "\t34\t22\t28",
26712   "Sea of Japan",
26713   "\t52\t139\t143",
26714   "\t51\t139\t143",
26715   "\t50\t139\t143",
26716   "\t49\t138\t143",
26717   "\t48\t137\t143",
26718   "\t47\t137\t143",
26719   "\t46\t135\t143",
26720   "\t45\t134\t143",
26721   "\t44\t130\t143",
26722   "\t43\t129\t142",
26723   "\t42\t128\t142",
26724   "\t41\t127\t141",
26725   "\t40\t127\t141",
26726   "\t39\t127\t141",
26727   "\t38\t127\t141",
26728   "\t37\t127\t140",
26729   "\t36\t126\t139",
26730   "\t35\t125\t138",
26731   "\t34\t125\t137",
26732   "\t33\t125\t133",
26733   "\t32\t125\t131",
26734   "\t31\t126\t130",
26735   "Sea of Marmara",
26736   "\t42\t26\t30",
26737   "\t41\t25\t30",
26738   "\t40\t25\t30",
26739   "\t39\t25\t30",
26740   "Sea of Okhotsk",
26741   "\t60\t141\t156",
26742   "\t59\t139\t156",
26743   "\t58\t137\t157",
26744   "\t57\t137\t157",
26745   "\t56\t137\t157",
26746   "\t55\t136\t157",
26747   "\t54\t136\t157",
26748   "\t53\t136\t157",
26749   "\t52\t136\t139\t141\t158",
26750   "\t51\t142\t158",
26751   "\t50\t142\t158",
26752   "\t49\t141\t157",
26753   "\t48\t141\t156",
26754   "\t47\t141\t155",
26755   "\t46\t140\t154",
26756   "\t45\t140\t153",
26757   "\t44\t140\t151",
26758   "\t43\t141\t149",
26759   "\t42\t143\t148",
26760   "Selat Bali",
26761   "\t-7\t113\t116",
26762   "\t-8\t113\t116",
26763   "\t-9\t113\t116",
26764   "Selat Dampier",
26765   "\t1\t128\t132",
26766   "\t0\t128\t132",
26767   "\t-1\t128\t132",
26768   "\t-2\t130\t132",
26769   "Seno de Skyring",
26770   "\t-51\t-74\t-70",
26771   "\t-52\t-74\t-70",
26772   "\t-53\t-74\t-70",
26773   "\t-54\t-74\t-71",
26774   "Seno Otway",
26775   "\t-51\t-72\t-70",
26776   "\t-52\t-73\t-70",
26777   "\t-53\t-73\t-70",
26778   "\t-54\t-73\t-70",
26779   "Shark Bay",
26780   "\t-23\t112\t114",
26781   "\t-24\t112\t115",
26782   "\t-25\t112\t115",
26783   "\t-26\t112\t115",
26784   "\t-27\t112\t115",
26785   "Shelikhova Gulf",
26786   "\t63\t162\t166",
26787   "\t62\t155\t166",
26788   "\t61\t153\t166",
26789   "\t60\t153\t165",
26790   "\t59\t153\t164",
26791   "\t58\t153\t162",
26792   "\t57\t154\t160",
26793   "\t56\t155\t158",
26794   "Sherman Basin",
26795   "\t69\t-99\t-97",
26796   "\t68\t-99\t-96",
26797   "\t67\t-99\t-96",
26798   "\t66\t-99\t-96",
26799   "Sibuyan Sea",
26800   "\t14\t120\t123",
26801   "\t13\t120\t124",
26802   "\t12\t120\t124",
26803   "\t11\t120\t124",
26804   "\t10\t120\t124",
26805   "Skagerrak",
26806   "\t60\t8\t12",
26807   "\t59\t6\t12",
26808   "\t58\t6\t12",
26809   "\t57\t6\t12",
26810   "\t56\t6\t11",
26811   "\t55\t7\t9",
26812   "Smith Sound",
26813   "\t53\t-128\t-125",
26814   "\t52\t-129\t-125",
26815   "\t51\t-129\t-125",
26816   "\t50\t-129\t-125",
26817   "Sognefjorden",
26818   "\t62\t3\t8",
26819   "\t61\t3\t8",
26820   "\t60\t3\t8",
26821   "\t59\t4\t8",
26822   "Solomon Sea",
26823   "\t-3\t151\t155",
26824   "\t-4\t146\t155",
26825   "\t-5\t145\t157",
26826   "\t-6\t145\t160",
26827   "\t-7\t145\t161",
26828   "\t-8\t146\t162",
26829   "\t-9\t147\t163",
26830   "\t-10\t147\t163",
26831   "\t-11\t148\t163",
26832   "\t-12\t152\t162",
26833   "South China Sea",
26834   "\t24\t112\t121",
26835   "\t23\t112\t121",
26836   "\t22\t109\t122",
26837   "\t21\t108\t123",
26838   "\t20\t108\t123",
26839   "\t19\t107\t123",
26840   "\t18\t105\t123",
26841   "\t17\t105\t123",
26842   "\t16\t105\t121",
26843   "\t15\t106\t121",
26844   "\t14\t107\t121",
26845   "\t13\t108\t121",
26846   "\t12\t107\t121",
26847   "\t11\t104\t121",
26848   "\t10\t104\t120",
26849   "\t9\t103\t120",
26850   "\t8\t102\t119",
26851   "\t7\t101\t118",
26852   "\t6\t101\t117",
26853   "\t5\t101\t117",
26854   "\t4\t101\t117",
26855   "\t3\t102\t116",
26856   "\t2\t102\t114",
26857   "\t1\t101\t113",
26858   "\t0\t101\t112",
26859   "\t-1\t101\t111",
26860   "\t-2\t103\t111",
26861   "\t-3\t103\t111",
26862   "\t-4\t105\t107",
26863   "Southern Ocean",
26864   "\t-59\t-180\t180",
26865   "\t-60\t-180\t180",
26866   "\t-61\t-180\t180",
26867   "\t-62\t-180\t180",
26868   "\t-63\t-180\t180",
26869   "\t-64\t-180\t180",
26870   "\t-65\t-180\t91\t103\t180",
26871   "\t-66\t-180\t87\t110\t180",
26872   "\t-67\t-180\t52\t54\t85\t112\t180",
26873   "\t-68\t-180\t51\t54\t84\t112\t122\t141\t180",
26874   "\t-69\t-180\t-69\t-66\t44\t74\t80\t145\t180",
26875   "\t-70\t-180\t-73\t-64\t33\t154\t180",
26876   "\t-71\t-180\t-79\t-63\t33\t159\t180",
26877   "\t-72\t-180\t-85\t-62\t2\t24\t27\t161\t163\t166\t180",
26878   "\t-73\t-100\t-91",
26879   "St. Helena Bay",
26880   "\t-30\t16\t19",
26881   "\t-31\t16\t19",
26882   "\t-32\t16\t19",
26883   "\t-33\t16\t19",
26884   "St. Lawrence River",
26885   "\t51\t-67\t-63",
26886   "\t50\t-69\t-63",
26887   "\t49\t-72\t-63",
26888   "\t48\t-72\t-63",
26889   "\t47\t-74\t-66",
26890   "\t46\t-75\t-68",
26891   "\t45\t-75\t-69",
26892   "\t44\t-75\t-72",
26893   "Stettiner Haff",
26894   "\t55\t12\t14",
26895   "\t54\t12\t15",
26896   "\t53\t12\t15",
26897   "\t52\t12\t15",
26898   "Storfjorden",
26899   "\t79\t17\t22",
26900   "\t78\t16\t22",
26901   "\t77\t15\t22",
26902   "\t76\t15\t22",
26903   "\t75\t15\t19",
26904   "Strait of Belle Isle",
26905   "\t53\t-56\t-54",
26906   "\t52\t-58\t-54",
26907   "\t51\t-58\t-54",
26908   "\t50\t-58\t-54",
26909   "Strait of Georgia",
26910   "\t51\t-126\t-122",
26911   "\t50\t-126\t-121",
26912   "\t49\t-126\t-121",
26913   "\t48\t-126\t-121",
26914   "\t47\t-124\t-121",
26915   "Strait of Gibraltar",
26916   "\t37\t-7\t-4",
26917   "\t36\t-7\t-4",
26918   "\t35\t-7\t-4",
26919   "\t34\t-6\t-4",
26920   "Strait of Juan de Fuca",
26921   "\t49\t-125\t-121",
26922   "\t48\t-125\t-121",
26923   "\t47\t-125\t-121",
26924   "Strait of Malacca",
26925   "\t9\t97\t99",
26926   "\t8\t97\t100",
26927   "\t7\t95\t101",
26928   "\t6\t94\t101",
26929   "\t5\t94\t101",
26930   "\t4\t94\t102",
26931   "\t3\t96\t103",
26932   "\t2\t97\t104",
26933   "\t1\t98\t104",
26934   "\t0\t99\t104",
26935   "\t-1\t101\t104",
26936   "Strait of Singapore",
26937   "\t2\t102\t105",
26938   "\t1\t102\t105",
26939   "\t0\t102\t105",
26940   "Straits of Florida",
26941   "\t27\t-81\t-77",
26942   "\t26\t-82\t-77",
26943   "\t25\t-84\t-77",
26944   "\t24\t-84\t-77",
26945   "\t23\t-84\t-77",
26946   "\t22\t-84\t-78",
26947   "Sulu Sea",
26948   "\t13\t118\t122",
26949   "\t12\t118\t123",
26950   "\t11\t118\t123",
26951   "\t10\t117\t124",
26952   "\t9\t116\t124",
26953   "\t8\t115\t124",
26954   "\t7\t115\t124",
26955   "\t6\t115\t123",
26956   "\t5\t115\t123",
26957   "\t4\t116\t121",
26958   "Sulzberger Bay",
26959   "\t-75\t-153\t-144",
26960   "\t-76\t-159\t-144",
26961   "\t-77\t-159\t-144",
26962   "\t-78\t-159\t-144",
26963   "Surigao Strait",
26964   "\t11\t124\t126",
26965   "\t10\t124\t126",
26966   "\t9\t124\t126",
26967   "\t8\t124\t126",
26968   "Taiwan Strait",
26969   "\t26\t117\t122",
26970   "\t25\t116\t122",
26971   "\t24\t116\t122",
26972   "\t23\t116\t121",
26973   "\t22\t116\t121",
26974   "Tasman Sea",
26975   "\t-28\t152\t160",
26976   "\t-29\t152\t160",
26977   "\t-30\t151\t162",
26978   "\t-31\t150\t166",
26979   "\t-32\t150\t170",
26980   "\t-33\t149\t174",
26981   "\t-34\t149\t174",
26982   "\t-35\t148\t175",
26983   "\t-36\t148\t175",
26984   "\t-37\t147\t175",
26985   "\t-38\t146\t176",
26986   "\t-39\t146\t176",
26987   "\t-40\t146\t176",
26988   "\t-41\t146\t176",
26989   "\t-42\t145\t175",
26990   "\t-43\t145\t172",
26991   "\t-44\t145\t171",
26992   "\t-45\t147\t169",
26993   "\t-46\t150\t168",
26994   "\t-47\t152\t168",
26995   "\t-48\t155\t168",
26996   "\t-49\t158\t167",
26997   "\t-50\t160\t167",
26998   "\t-51\t163\t167",
26999   "Tatar Strait",
27000   "\t54\t139\t142",
27001   "\t53\t139\t142",
27002   "\t52\t139\t142",
27003   "\t51\t140\t142",
27004   "\t50\t140\t142",
27005   "Tayabas Bay",
27006   "\t14\t119\t123",
27007   "\t13\t119\t123",
27008   "\t12\t119\t123",
27009   "The North Western Passages",
27010   "\t81\t-101\t-95",
27011   "\t80\t-108\t-90\t-88\t-82",
27012   "\t79\t-114\t-80",
27013   "\t78\t-117\t-80",
27014   "\t77\t-120\t-80",
27015   "\t76\t-120\t-81",
27016   "\t75\t-120\t-78",
27017   "\t74\t-120\t-76",
27018   "\t73\t-106\t-76",
27019   "\t72\t-106\t-76",
27020   "\t71\t-118\t-116\t-106\t-83",
27021   "\t70\t-119\t-112\t-108\t-83",
27022   "\t69\t-119\t-85",
27023   "\t68\t-119\t-92",
27024   "\t67\t-118\t-92",
27025   "\t66\t-116\t-106\t-104\t-94",
27026   "\t65\t-97\t-94",
27027   "Timor Sea",
27028   "\t-7\t125\t131",
27029   "\t-8\t123\t131",
27030   "\t-9\t121\t132",
27031   "\t-10\t121\t133",
27032   "\t-11\t121\t133",
27033   "\t-12\t122\t133",
27034   "\t-13\t124\t133",
27035   "\t-14\t125\t131",
27036   "Torres Strait",
27037   "\t-8\t140\t144",
27038   "\t-9\t140\t144",
27039   "\t-10\t140\t144",
27040   "\t-11\t141\t143",
27041   "\t-12\t141\t143",
27042   "Trondheimsfjorden",
27043   "\t65\t10\t12",
27044   "\t64\t7\t12",
27045   "\t63\t7\t12",
27046   "\t62\t7\t12",
27047   "Tsugaru Strait",
27048   "\t42\t139\t142",
27049   "\t41\t139\t142",
27050   "\t40\t139\t142",
27051   "\t39\t139\t142",
27052   "Tyrrhenian Sea",
27053   "\t45\t8\t11",
27054   "\t44\t8\t11",
27055   "\t43\t8\t12",
27056   "\t42\t8\t14",
27057   "\t41\t8\t16",
27058   "\t40\t8\t17",
27059   "\t39\t7\t17",
27060   "\t38\t7\t17",
27061   "\t37\t7\t17",
27062   "\t36\t10\t14",
27063   "Uchiura Bay",
27064   "\t43\t139\t144",
27065   "\t42\t139\t144",
27066   "\t41\t139\t144",
27067   "\t40\t139\t143",
27068   "\t39\t140\t142",
27069   "Uda Bay",
27070   "\t57\t136\t139",
27071   "\t56\t134\t139",
27072   "\t55\t134\t139",
27073   "\t54\t134\t139",
27074   "\t53\t134\t139",
27075   "\t52\t135\t138",
27076   "Ungava Bay",
27077   "\t61\t-71\t-63",
27078   "\t60\t-71\t-63",
27079   "\t59\t-71\t-63",
27080   "\t58\t-71\t-64",
27081   "\t57\t-71\t-64",
27082   "\t56\t-70\t-66",
27083   "Uummannaq Fjord",
27084   "\t73\t-54\t-52",
27085   "\t72\t-55\t-50",
27086   "\t71\t-55\t-49",
27087   "\t70\t-55\t-49",
27088   "\t69\t-55\t-49",
27089   "Vestfjorden",
27090   "\t69\t12\t18",
27091   "\t68\t11\t18",
27092   "\t67\t11\t18",
27093   "\t66\t11\t17",
27094   "\t65\t12\t14",
27095   "Vil'kitskogo Strait",
27096   "\t79\t99\t106",
27097   "\t78\t99\t106",
27098   "\t77\t99\t106",
27099   "\t76\t99\t106",
27100   "\t75\t99\t101",
27101   "Vincennes Bay",
27102   "\t-65\t103\t111",
27103   "\t-66\t103\t111",
27104   "\t-67\t103\t111",
27105   "Visayan Sea",
27106   "\t13\t122\t124",
27107   "\t12\t121\t125",
27108   "\t11\t121\t125",
27109   "\t10\t121\t125",
27110   "\t9\t121\t125",
27111   "Viscount Melville Sound",
27112   "\t76\t-110\t-103",
27113   "\t75\t-115\t-103",
27114   "\t74\t-116\t-103",
27115   "\t73\t-116\t-103",
27116   "\t72\t-116\t-104",
27117   "\t71\t-114\t-107",
27118   "Waddenzee",
27119   "\t54\t3\t7",
27120   "\t53\t3\t7",
27121   "\t52\t3\t7",
27122   "\t51\t3\t6",
27123   "Wager Bay",
27124   "\t66\t-92\t-86",
27125   "\t65\t-92\t-86",
27126   "\t64\t-92\t-86",
27127   "Weddell Sea",
27128   "\t-70\t-62\t-9",
27129   "\t-71\t-63\t-9",
27130   "\t-72\t-63\t-9",
27131   "\t-73\t-64\t-10",
27132   "\t-74\t-66\t-13",
27133   "\t-75\t-78\t-14",
27134   "\t-76\t-84\t-17",
27135   "\t-77\t-84\t-25",
27136   "\t-78\t-84\t-22",
27137   "\t-79\t-84\t-22",
27138   "\t-80\t-82\t-22",
27139   "\t-81\t-79\t-23",
27140   "\t-82\t-70\t-36",
27141   "\t-83\t-66\t-50\t-48\t-42",
27142   "\t-84\t-62\t-57",
27143   "White Sea",
27144   "\t69\t37\t45",
27145   "\t68\t30\t33\t37\t45",
27146   "\t67\t30\t45",
27147   "\t66\t30\t45",
27148   "\t65\t31\t45",
27149   "\t64\t33\t41",
27150   "\t63\t33\t41",
27151   "\t62\t35\t38",
27152   "Wrigley Gulf",
27153   "\t-72\t-131\t-124",
27154   "\t-73\t-135\t-123",
27155   "\t-74\t-135\t-123",
27156   "\t-75\t-135\t-123",
27157   "Wynniat Bay",
27158   "\t73\t-112\t-109",
27159   "\t72\t-112\t-109",
27160   "\t71\t-112\t-109",
27161   "Yellow Sea",
27162   "\t41\t123\t125",
27163   "\t40\t120\t126",
27164   "\t39\t120\t126",
27165   "\t38\t119\t127",
27166   "\t37\t119\t127",
27167   "\t36\t118\t127",
27168   "\t35\t118\t127",
27169   "\t34\t118\t127",
27170   "\t33\t118\t127",
27171   "\t32\t119\t127",
27172   "\t31\t119\t125",
27173   "\t30\t120\t123",
27174   "Yellowstone Lake",
27175   "\t45\t-111\t-109",
27176   "\t44\t-111\t-109",
27177   "\t43\t-111\t-109",
27178   "Yenisey Gulf",
27179   "\t74\t77\t81",
27180   "\t73\t77\t83",
27181   "\t72\t77\t84",
27182   "\t71\t77\t84",
27183   "\t70\t79\t84",
27184   "\t69\t81\t84",
27185   "Yucatan Channel",
27186   "\t23\t-86\t-84",
27187   "\t22\t-88\t-83",
27188   "\t21\t-88\t-83",
27189   "\t20\t-88\t-83",
27190   NULL
27191 };
27192 
27193 
27194 //Not part of AutoDef or Cleanup
sqn_binary_search_on_uint4_list(Uint4Ptr list,Uint4 pos,Uint4 listlen)27195 static Uint4 sqn_binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
27196 {
27197    Uint4  L;
27198    Uint4  mid;
27199    Uint4  R;
27200 
27201    if (list == NULL || listlen == 0)
27202       return 0;
27203    L = 0;
27204    R = listlen - 1;
27205    while (L < R)
27206    {
27207       mid = (L+R)/2;
27208       if (list[mid + 1] <= pos)
27209       {
27210          L = mid + 1;
27211       } else
27212       {
27213          R = mid;
27214       }
27215    }
27216    return R;
27217 }
27218 
27219 
27220 //Not part of AutoDef or Cleanup
MapRowCoordsSpecial(SeqAlignPtr sap,Uint4 pos,Int4 row,Boolean is_left_end)27221 static Int4 MapRowCoordsSpecial(SeqAlignPtr sap, Uint4 pos, Int4 row, Boolean is_left_end)
27222 {
27223    DenseSegPtr  dsp;
27224    Int4         idx;
27225    Int4         offset;
27226    SAIndexPtr   saip;
27227    Int4         start;
27228 
27229    if (sap == NULL || row < 0)
27230       return -1;
27231    if (sap->saip == NULL)
27232       return -1;
27233    saip = (SAIndexPtr)sap->saip;
27234    dsp = (DenseSegPtr)sap->segs;
27235    start = sqn_binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
27236    offset = pos - saip->aligncoords[start];
27237    idx = (dsp->dim*start) + row - 1;
27238    if (dsp->starts[idx] == -1)
27239    {
27240       if (is_left_end)
27241       {
27242          /* round up */
27243          while (start < dsp->numseg) {
27244             idx = (dsp->dim*start) + row - 1;
27245             if (dsp->starts[idx] != -1)
27246                return (dsp->starts[idx]);
27247             start++;
27248          }
27249          return -2;
27250       }
27251       else
27252       {
27253          /* round down */
27254          while (start >= 0) {
27255             idx = (dsp->dim*start) + row - 1;
27256             if (dsp->starts[idx] != -1)
27257                return (dsp->starts[idx] + dsp->lens[start] - 1);
27258             start--;
27259          }
27260          return -2;
27261       }
27262    } else
27263    {
27264       idx = (dsp->dim*start) + row - 1;
27265       if (dsp->strands[idx] != Seq_strand_minus)
27266          return (dsp->starts[idx] + offset);
27267       else
27268          return (dsp->starts[idx] + dsp->lens[start] - 1 - offset);
27269    }
27270    return -1;
27271 }
27272 
27273 
27274 //Not part of AutoDef or Cleanup
MapBioseqToBioseqSpecial(SeqAlignPtr sap,Int4 begin,Int4 fin,Int4 pos,Boolean is_left_end)27275 static Int4 MapBioseqToBioseqSpecial(SeqAlignPtr sap, Int4 begin, Int4 fin, Int4 pos, Boolean is_left_end)
27276 {
27277    Int4  bspos;
27278    Int4  sapos;
27279    Int4  start1;
27280    Int4  start2;
27281    Int4  stop1;
27282    Int4  stop2;
27283 
27284    if (sap == NULL || sap->saip == NULL)
27285       return -2;
27286    AlnMgr2GetNthSeqRangeInSA(sap, begin, &start1, &stop1);
27287    AlnMgr2GetNthSeqRangeInSA(sap, fin, &start2, &stop2);
27288    /* check to see whether the position is outside the alignment */
27289    if (pos < start1)
27290       return (start2 - (start1 - pos));
27291    else if (pos > stop1)
27292       return (stop2 + (pos-stop1));
27293    sapos = AlnMgr2MapBioseqToSeqAlign(sap, pos, begin);
27294    bspos = MapRowCoordsSpecial(sap, sapos, fin, is_left_end);
27295    if (bspos >= 0)
27296       return bspos;
27297    else if (is_left_end)
27298       return (start2-1);
27299    else
27300       return (stop2+1);
27301 }
27302 
27303 //Not part of AutoDef or Cleanup
27304 /* This function adjusts the endpoints of a location, as long as the
27305  * endpoints are in the area represented by the alignment.
27306  * When we are adjusting locations for an alignment of a part, we will
27307  * be looking at all features indexed on the main segment, but we only
27308  * want to adjust feature endpoints located on the segment that we are
27309  * updating.
27310  */
AdjustEndpoint(SeqAlignPtr salp,SeqLocPtr slp,Int4 max_length,Int4 begin,Int4 fin,Int4 endpoint,Boolean is_left_end)27311 static Int4 AdjustEndpoint
27312 (SeqAlignPtr salp,
27313  SeqLocPtr   slp,
27314  Int4        max_length,
27315  Int4        begin,
27316  Int4        fin,
27317  Int4        endpoint,
27318  Boolean     is_left_end)
27319 {
27320   BioseqPtr            slp_bsp, parent_bsp, old_bsp;
27321   SeqMgrSegmentContext segcontext;
27322   SeqIdPtr             old_sip;
27323   Int4                 pt;
27324 
27325   if (slp == NULL || salp == NULL)
27326   {
27327     return endpoint;
27328   }
27329 
27330   old_sip = AlnMgr2GetNthSeqIdPtr (salp, begin);
27331   old_bsp = BioseqFind (old_sip);
27332 
27333   parent_bsp = SeqMgrGetParentOfPart (old_bsp, &segcontext);
27334 
27335   slp_bsp = BioseqFind (SeqLocId (slp));
27336   if (slp_bsp == old_bsp
27337       || (slp_bsp == parent_bsp
27338           && endpoint >= segcontext.cumOffset + segcontext.from
27339           && endpoint < segcontext.cumOffset + segcontext.to))
27340   {
27341     if (slp_bsp == parent_bsp)
27342     {
27343       endpoint -= segcontext.cumOffset + segcontext.from;
27344     }
27345     pt = MapBioseqToBioseqSpecial (salp, begin, fin, endpoint, is_left_end);
27346     if (pt < 0) {
27347       pt = 0;
27348     } else if (pt >= max_length) {
27349       pt = max_length - 1;
27350     }
27351     if (slp_bsp == parent_bsp)
27352     {
27353       pt += segcontext.cumOffset + segcontext.from;
27354     }
27355   }
27356   else
27357   {
27358     pt = endpoint;
27359   }
27360 
27361   return pt;
27362 }
27363 
27364 
27365 //Not part of AutoDef or Cleanup
ReplaceLocation(SeqAlignPtr salp,SeqLocPtr slp,Int4 length,Int4 begin,Int4 fin)27366 static void ReplaceLocation (SeqAlignPtr salp, SeqLocPtr slp, Int4 length, Int4 begin, Int4 fin)
27367 
27368 {
27369   PackSeqPntPtr  psp;
27370   SeqIntPtr      sinp;
27371   SeqPntPtr      spp;
27372   Uint1          used;
27373 
27374   if (slp == NULL) return;
27375   switch (slp->choice) {
27376     case SEQLOC_INT :
27377       sinp = (SeqIntPtr) slp->data.ptrvalue;
27378       if (sinp != NULL) {
27379         sinp->from = AdjustEndpoint (salp, slp, length, begin, fin,
27380                                      sinp->from, TRUE);
27381         sinp->to = AdjustEndpoint (salp, slp, length, begin, fin,
27382                                    sinp->to, FALSE);
27383       }
27384       break;
27385     case SEQLOC_PNT :
27386       spp = (SeqPntPtr) slp->data.ptrvalue;
27387       if (spp != NULL) {
27388         spp->point = AdjustEndpoint (salp, slp, length, begin, fin,
27389                                      spp->point, TRUE);
27390       }
27391       break;
27392     case SEQLOC_PACKED_PNT :
27393       psp = (PackSeqPntPtr) slp->data.ptrvalue;
27394       if (psp != NULL) {
27395         for (used = 0; used < psp->used; used++) {
27396           psp->pnts [used] = AdjustEndpoint (salp, slp, length, begin, fin,
27397                                              psp->pnts [used], TRUE);
27398         }
27399       }
27400       break;
27401     default :
27402       break;
27403   }
27404 }
27405 
27406 
27407 //Not part of AutoDef or Cleanup
27408 /* this function iterates through the pieces of a complex location
27409  * and calls ReplaceLocation for each one.  ReplaceLocation will only
27410  * act on SEQLOC_INT, SEQLOC_PNT, and SEQLOC_PACKED_PNT and will ignore
27411  * other types.
27412  */
27413 NLM_EXTERN void
ReplaceComplexLocation(SeqLocPtr slp,SeqAlignPtr salp,Int4 new_len,Int4 begin,Int4 fin)27414 ReplaceComplexLocation
27415 (SeqLocPtr   slp,
27416  SeqAlignPtr salp,
27417  Int4        new_len,
27418  Int4        begin,
27419  Int4        fin)
27420 {
27421   SeqLocPtr subslp;
27422 
27423   if (slp == NULL || salp == NULL)
27424   {
27425     return;
27426   }
27427 
27428   subslp = SeqLocFindNext (slp, NULL);
27429   while (subslp != NULL) {
27430     ReplaceLocation (salp, subslp, new_len, begin, fin);
27431     subslp = SeqLocFindNext (slp, subslp);
27432   }
27433 }
27434 
27435 
27436 //Not part of AutoDef or Cleanup
UpdateOneFeatureForSequenceReplace(SeqFeatPtr sfp,SeqAlignPtr salp,BioseqPtr oldbsp,Int4 new_len)27437 static void UpdateOneFeatureForSequenceReplace
27438 (SeqFeatPtr  sfp,
27439  SeqAlignPtr salp,
27440  BioseqPtr   oldbsp,
27441  Int4        new_len)
27442 {
27443   CodeBreakPtr cbp;
27444   CdRegionPtr  crp;
27445   RnaRefPtr    rrp;
27446   tRNAPtr      trp;
27447 
27448   if (sfp == NULL || salp == NULL)
27449   {
27450     return;
27451   }
27452 
27453   ReplaceComplexLocation (sfp->location, salp, new_len, 1, 2);
27454 
27455   switch (sfp->data.choice) {
27456     case SEQFEAT_CDREGION :
27457       crp = (CdRegionPtr) sfp->data.value.ptrvalue;
27458       if (crp != NULL)
27459       {
27460         for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next)
27461         {
27462           ReplaceComplexLocation (cbp->loc, salp, new_len, 1, 2);
27463         }
27464       }
27465       break;
27466     case SEQFEAT_RNA :
27467       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
27468       if (rrp != NULL && rrp->ext.choice == 2) {
27469         trp = (tRNAPtr) rrp->ext.value.ptrvalue;
27470         if (trp != NULL && trp->anticodon != NULL) {
27471           ReplaceComplexLocation (trp->anticodon, salp, new_len, 1, 2);
27472         }
27473       }
27474       break;
27475     default :
27476       break;
27477   }
27478 }
27479 
27480 
27481 //Not part of AutoDef or Cleanup
UpdateLocationsForSequenceReplace(SeqAlignPtr salp,BioseqPtr oldbsp,BioseqPtr newbsp)27482 static void UpdateLocationsForSequenceReplace
27483 (SeqAlignPtr salp,
27484  BioseqPtr oldbsp,
27485  BioseqPtr newbsp)
27486 {
27487   BioseqPtr    parentbsp;
27488   SeqMgrFeatContext  context;
27489   SeqMgrSegmentContext segcontext;
27490   SeqFeatPtr           sfp;
27491 
27492   if (salp == NULL || oldbsp == NULL || newbsp == NULL)
27493   {
27494     return;
27495   }
27496 
27497   /* if this sequence is a part, the features will be indexed on
27498    * the parent.
27499    */
27500   parentbsp = SeqMgrGetParentOfPart (oldbsp, &segcontext);
27501   if (parentbsp == NULL)
27502   {
27503     sfp = SeqMgrGetNextFeature (oldbsp, NULL, 0, 0, &context);
27504     while (sfp != NULL)
27505     {
27506       UpdateOneFeatureForSequenceReplace (sfp, salp, oldbsp, newbsp->length);
27507       sfp = SeqMgrGetNextFeature (oldbsp, sfp, 0, 0, &context);
27508 
27509     }
27510   }
27511   else
27512   {
27513     sfp = SeqMgrGetNextFeature (parentbsp, NULL, 0, 0, &context);
27514     while (sfp != NULL)
27515     {
27516       UpdateOneFeatureForSequenceReplace (sfp, salp, oldbsp, newbsp->length);
27517       sfp = SeqMgrGetNextFeature (parentbsp, sfp, 0, 0, &context);
27518     }
27519   }
27520 }
27521 
27522 
27523 //Not part of AutoDef or Cleanup
27524 NLM_EXTERN void
ReplaceOneSequence(SeqAlignPtr salp,BioseqPtr oldbsp,BioseqPtr newbsp)27525 ReplaceOneSequence
27526 (SeqAlignPtr salp,
27527  BioseqPtr oldbsp,
27528  BioseqPtr newbsp)
27529 {
27530   SeqDataPtr         bs;
27531   Int4               len, len_change;
27532   Uint1              seq_data_type, seq_ext_type;
27533   Pointer            seq_ext;
27534   Uint1              repr;
27535   BioseqPtr          parent_bsp;
27536   SeqMgrSegmentContext context;
27537 
27538   if (oldbsp == NULL || newbsp == NULL)
27539   {
27540     return;
27541   }
27542 
27543   UpdateLocationsForSequenceReplace (salp, oldbsp, newbsp);
27544   len_change = newbsp->length - oldbsp->length;
27545 
27546   /* switch bioseqs to finish update */
27547 
27548   bs = oldbsp->seq_data;
27549   oldbsp->seq_data = newbsp->seq_data;
27550   newbsp->seq_data = bs;
27551   len = oldbsp->length;
27552   oldbsp->length = newbsp->length;
27553   newbsp->length = len;
27554   seq_data_type = oldbsp->seq_data_type;
27555   oldbsp->seq_data_type = newbsp->seq_data_type;
27556   newbsp->seq_data_type = seq_data_type;
27557   /* also move seq_ext, for delta sequences */
27558   seq_ext_type = oldbsp->seq_ext_type;
27559   seq_ext = oldbsp->seq_ext;
27560   oldbsp->seq_ext_type = newbsp->seq_ext_type;
27561   oldbsp->seq_ext = newbsp->seq_ext;
27562   newbsp->seq_ext_type = seq_ext_type;
27563   newbsp->seq_ext = seq_ext;
27564 
27565   /* swap repr */
27566   repr = oldbsp->repr;
27567   oldbsp->repr = newbsp->repr;
27568   newbsp->repr = repr;
27569 
27570   /* if this was part of a segmented set, update the parent length */
27571   parent_bsp = SeqMgrGetParentOfPart (oldbsp, &context);
27572   if (parent_bsp != NULL)
27573   {
27574     parent_bsp->length += len_change;
27575   }
27576 }
27577 
27578 
27579 //Not part of AutoDef or Cleanup
AreSequenceResiduesIdentical(BioseqPtr bsp1,BioseqPtr bsp2)27580 NLM_EXTERN Boolean AreSequenceResiduesIdentical (BioseqPtr bsp1, BioseqPtr bsp2)
27581 {
27582   SeqPortPtr    spp1, spp2;
27583   Uint1         seqcode;
27584   Int4          buf_len = 255;
27585   Char          buf1[255], buf2[255];
27586   Int4          ctr1, ctr2, offset;
27587   Boolean       rval;
27588 
27589   if (bsp1 == NULL && bsp2 == NULL)
27590   {
27591     return TRUE;
27592   }
27593   else if (bsp1 == NULL || bsp2 == NULL)
27594   {
27595     return FALSE;
27596   }
27597   else if (bsp1->length != bsp2->length)
27598   {
27599     return FALSE;
27600   }
27601   else if (ISA_na (bsp1->mol) && ! ISA_na (bsp2->mol))
27602   {
27603     return FALSE;
27604   }
27605   else if (!ISA_na (bsp1->mol) && ISA_na (bsp2->mol))
27606   {
27607     return FALSE;
27608   }
27609 
27610   if (ISA_na (bsp1->mol))
27611   {
27612     seqcode = Seq_code_iupacna;
27613   }
27614   else
27615   {
27616     seqcode = Seq_code_iupacaa;
27617   }
27618 
27619 
27620   spp1 = SeqPortNew (bsp1, 0, bsp1->length - 1, Seq_strand_plus, seqcode);
27621   spp2 = SeqPortNew (bsp2, 0, bsp2->length - 1, Seq_strand_plus, seqcode);
27622 
27623   ctr1 = SeqPortRead (spp1, (Uint1Ptr)buf1, buf_len - 1);
27624   ctr2 = SeqPortRead (spp2, (Uint1Ptr)buf2, buf_len - 1);
27625   buf1 [ctr1] = 0;
27626   buf2 [ctr2] = 0;
27627   offset = ctr1;
27628 
27629   while (ctr1 == ctr2 && StringCmp (buf1, buf2) == 0 && offset < bsp1->length)
27630   {
27631     ctr1 = SeqPortRead (spp1, (Uint1Ptr)buf1, buf_len - 1);
27632     ctr2 = SeqPortRead (spp2, (Uint1Ptr)buf2, buf_len - 1);
27633     buf1 [ctr1] = 0;
27634     buf2 [ctr2] = 0;
27635     offset += ctr1;
27636   }
27637 
27638   if (ctr1 != ctr2 || StringCmp (buf1, buf2) != 0 || offset < bsp1->length)
27639   {
27640     rval = FALSE;
27641   }
27642   else
27643   {
27644     rval = TRUE;
27645   }
27646 
27647   spp1 = SeqPortFree (spp1);
27648   spp2 = SeqPortFree (spp2);
27649 
27650   return rval;
27651 }
27652 
27653 //Not part of AutoDef or Cleanup
FindBestCitSubCallback(GatherContextPtr gcp)27654 static Boolean FindBestCitSubCallback (GatherContextPtr gcp)
27655 
27656 {
27657   CitSubPtr   best;
27658   CitSubPtr   PNTR bestp;
27659   CitSubPtr   csp;
27660   PubdescPtr  pdp;
27661   ValNodePtr  sdp;
27662   ValNodePtr  vnp;
27663 
27664   if (gcp == NULL) return TRUE;
27665   bestp = (CitSubPtr PNTR) gcp->userdata;
27666   if (bestp == NULL) return TRUE;
27667   if (gcp->thistype != OBJ_SEQDESC) return TRUE;
27668   sdp = (ValNodePtr) gcp->thisitem;
27669   if (sdp == NULL || sdp->choice != Seq_descr_pub) return TRUE;
27670   pdp = (PubdescPtr) sdp->data.ptrvalue;
27671   if (pdp == NULL) return TRUE;
27672   vnp = pdp->pub;
27673   if (vnp == NULL || vnp->choice != PUB_Sub) return TRUE;
27674   csp = (CitSubPtr) vnp->data.ptrvalue;
27675   if (csp == NULL) return TRUE;
27676   if (*bestp == NULL) {
27677     *bestp = csp;
27678     return TRUE;
27679   }
27680   best = *bestp;
27681   if (DateMatch (best->date, csp->date, FALSE) == -1) {
27682     *bestp = csp;
27683     return TRUE;
27684   }
27685   return TRUE;
27686 }
27687 
27688 //Not part of AutoDef or Cleanup
FindBestCitSubForSeqEntry(SeqEntryPtr sep)27689 static CitSubPtr FindBestCitSubForSeqEntry (SeqEntryPtr sep)
27690 {
27691   CitSubPtr    best = NULL;
27692   GatherScope  gs;
27693 
27694   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
27695   gs.seglevels = 1;
27696   gs.get_feats_location = FALSE;
27697   MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
27698   gs.ignore[OBJ_BIOSEQ] = FALSE;
27699   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
27700   gs.ignore[OBJ_SEQDESC] = FALSE;
27701   gs.scope = sep;
27702   GatherSeqEntry (sep, (Pointer) (&best), FindBestCitSubCallback, &gs);
27703   return best;
27704 }
27705 
27706 //Not part of AutoDef or Cleanup
CreateUpdateCitSubFromBestTemplate(SeqEntryPtr top_sep,SeqEntryPtr upd_sep,CharPtr update_txt)27707 NLM_EXTERN ValNodePtr CreateUpdateCitSubFromBestTemplate (
27708   SeqEntryPtr top_sep,
27709   SeqEntryPtr upd_sep,
27710   CharPtr     update_txt
27711 )
27712 {
27713   CitSubPtr    best;
27714   CitSubPtr    csp;
27715   DatePtr      dp;
27716   PubdescPtr   pdp;
27717   ValNodePtr   sdp;
27718   ValNodePtr   vnp;
27719   ValNodePtr   err_list = NULL;
27720 
27721   if (upd_sep == NULL) {
27722     return NULL;
27723   }
27724   best = FindBestCitSubForSeqEntry (upd_sep);
27725   if (best == NULL) {
27726     best = FindBestCitSubForSeqEntry (top_sep);
27727   }
27728   if (best == NULL) {
27729     ValNodeAddPointer (&err_list, 0, "There is no earlier cit-sub template");
27730     return err_list;
27731   }
27732   dp = DateCurr ();
27733   if (dp != NULL) {
27734     if (DateMatch (best->date, dp, FALSE) == 0) {
27735       DateFree (dp);
27736 	    dp = NULL;
27737       if (upd_sep == top_sep
27738           && StringICmp (best->descr, update_txt) == 0)
27739       {
27740         ValNodeAddPointer (&err_list, 0, "There already exists an update on today's date");
27741         return err_list;
27742       } else if (best->descr == NULL) {
27743         best->descr = StringSave (update_txt);
27744         ValNodeAddPointer (&err_list, 1, "Adding update indication to existing cit-sub");
27745         return err_list;
27746       }
27747     }
27748     DateFree (dp);
27749   }
27750   sdp = CreateNewDescriptor (upd_sep, Seq_descr_pub);
27751   if (sdp == NULL) return FALSE;
27752   pdp = PubdescNew ();
27753   if (pdp == NULL) return FALSE;
27754   sdp->data.ptrvalue = (Pointer) pdp;
27755   vnp = ValNodeNew (NULL);
27756   csp = AsnIoMemCopy ((Pointer) best,
27757                       (AsnReadFunc) CitSubAsnRead,
27758                       (AsnWriteFunc) CitSubAsnWrite);
27759   pdp->pub = vnp;
27760   vnp->choice = PUB_Sub;
27761   vnp->data.ptrvalue = csp;
27762   csp->date = DateFree (csp->date);
27763   csp->date = DateCurr ();
27764   if (!StringHasNoText (update_txt)) {
27765     csp->descr = MemFree (csp->descr);
27766     csp->descr = StringSave (update_txt);
27767   }
27768 
27769   if (top_sep == upd_sep)
27770   {
27771     ValNodeAddPointer (&err_list, 1, "The update Cit-sub has been placed on the top Seq-entry");
27772   }
27773   return err_list;
27774 }
27775 
27776 
27777 //Not part of AutoDef or Cleanup
27778 CharPtr kSubmitterUpdateText = "Sequence update by submitter";
27779 
AddCitSubToUpdatedSequence(BioseqPtr upd_bsp,Uint2 input_entityID,CharPtr update_txt)27780 NLM_EXTERN void AddCitSubToUpdatedSequence (BioseqPtr upd_bsp, Uint2 input_entityID, CharPtr update_txt)
27781 {
27782   SeqEntryPtr top_sep, upd_sep;
27783   ValNodePtr  msg_list;
27784 
27785   upd_sep = GetBestTopParentForData (input_entityID, upd_bsp);
27786   if (upd_sep == NULL) return;
27787   top_sep = GetTopSeqEntryForEntityID ( input_entityID);
27788   if (top_sep == NULL) return;
27789   msg_list = CreateUpdateCitSubFromBestTemplate (top_sep, upd_sep, update_txt);
27790   if (msg_list != NULL) {
27791     if (msg_list->choice == 0) {
27792       Message (MSG_ERROR, (CharPtr) msg_list->data.ptrvalue);
27793     } else {
27794       Message (MSG_OK, (CharPtr) msg_list->data.ptrvalue);
27795     }
27796     msg_list = ValNodeFree (msg_list);
27797   }
27798 }
27799 
27800 
27801 //Not used for Autodef or cleanup
ListPhrapGraphsCallback(SeqGraphPtr sgp,Pointer userdata)27802 static void ListPhrapGraphsCallback (SeqGraphPtr sgp, Pointer userdata)
27803 {
27804   ValNodePtr PNTR vnpp;
27805 
27806   if (sgp == NULL || userdata == NULL) return;
27807   if (StringICmp (sgp->title, "Phrap Quality") == 0)
27808   {
27809     vnpp = (ValNodePtr PNTR) userdata;
27810     ValNodeAddPointer (vnpp, 0, sgp);
27811   }
27812 }
27813 
27814 //Not used for Autodef or cleanup
RemoveQualityScores(BioseqPtr bsp,FILE * log_fp,BoolPtr data_in_log)27815 NLM_EXTERN void RemoveQualityScores
27816 (BioseqPtr bsp,
27817  FILE      *log_fp,
27818  BoolPtr   data_in_log)
27819 
27820 {
27821   ValNodePtr score_list = NULL, vnp;
27822   Char          acc_str [256];
27823   SeqGraphPtr   sgp;
27824 
27825   if (bsp == NULL) return;
27826 
27827   VisitGraphsOnBsp (bsp, &score_list, ListPhrapGraphsCallback);
27828   if (score_list == NULL) return;
27829 
27830   if (log_fp != NULL && data_in_log != NULL)
27831   {
27832     SeqIdWrite (bsp->id, acc_str, PRINTID_REPORT, sizeof (acc_str));
27833     fprintf (log_fp, "Quality scores cleared for %s\n", acc_str);
27834     *data_in_log = TRUE;
27835   }
27836 
27837   for (vnp = score_list; vnp != NULL; vnp = vnp->next)
27838   {
27839     sgp = vnp->data.ptrvalue;
27840     if (sgp != NULL)
27841     {
27842       sgp->idx.deleteme = TRUE;
27843     }
27844   }
27845 
27846   score_list = ValNodeFree (score_list);
27847   DeleteMarkedObjects (0, OBJ_BIOSEQ, (Pointer) bsp);
27848 }
27849 
27850 
27851 //Not used for Autodef or cleanup
GetNextCharacterFromFile(FILE * fp,BoolPtr pIsASN)27852 static Char GetNextCharacterFromFile (FILE *fp, BoolPtr pIsASN)
27853 {
27854   FileCache    fc;
27855   CharPtr      str;
27856   Char         special_symbol;
27857   Char         line [128];
27858   Int4         pos;
27859 
27860   /* look ahead to see what character caused inability to interpret line */
27861   FileCacheSetup (&fc, fp);
27862   /* pos = FileCacheTell (&fc); */
27863   str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
27864   if (str != NULL && StringDoesHaveText (str)) {
27865     TrimSpacesAroundString (str);
27866   }
27867   special_symbol = line [0];
27868   if (pIsASN != NULL)
27869   {
27870     if (StringStr (line, "::=") != NULL)
27871     {
27872       *pIsASN = TRUE;
27873     }
27874     else
27875     {
27876       *pIsASN = FALSE;
27877     }
27878   }
27879   /* seek to start of next line after one that could not be interpreted */
27880   pos = FileCacheTell (&fc);
27881   FileCacheSetup (&fc, fp);
27882   FileCacheSeek (&fc, pos);
27883   fseek (fp, pos, SEEK_SET);
27884   return special_symbol;
27885 }
27886 
27887 //Not used for Autodef or cleanup
ReplaceFakeIDWithIDFromTitle(BioseqPtr bsp)27888 NLM_EXTERN void ReplaceFakeIDWithIDFromTitle (BioseqPtr bsp)
27889 {
27890   SeqDescrPtr sdp;
27891   CharPtr     title_txt, new_id_str;
27892   Int4        id_len;
27893   Boolean     remove_punct = FALSE;
27894 
27895   if (bsp == NULL)
27896   {
27897     return;
27898   }
27899 
27900   bsp->id = SeqIdFree (bsp->id);
27901 
27902   sdp = bsp->descr;
27903   while (sdp != NULL && sdp->choice != Seq_descr_title)
27904   {
27905     sdp = sdp->next;
27906   }
27907   if (sdp != NULL && !StringHasNoText (sdp->data.ptrvalue))
27908   {
27909     title_txt = sdp->data.ptrvalue;
27910     /* skip any leading spaces */
27911     title_txt += StringSpn (title_txt, " \t");
27912     /* look for local IDs surrounded by quotes - the real way to have an ID with a space */
27913     if (*title_txt == '\'')
27914     {
27915       title_txt ++;
27916       id_len = StringCSpn (title_txt, "\'");
27917       if (title_txt [id_len] == '\'')
27918       {
27919         remove_punct = TRUE;
27920       }
27921       else
27922       {
27923         id_len = 0;
27924       }
27925     }
27926     else if (*title_txt == '\"')
27927     {
27928       title_txt ++;
27929       id_len = StringCSpn (title_txt, "\"");
27930       if (title_txt [id_len] == '\"')
27931       {
27932         remove_punct = TRUE;
27933       }
27934       else
27935       {
27936         id_len = 0;
27937       }
27938     }
27939     else
27940     {
27941       id_len = StringCSpn (title_txt, " \t");
27942     }
27943 
27944     if (id_len > 0)
27945     {
27946       new_id_str = (CharPtr) MemNew ((id_len + 1) * sizeof (Char));
27947       if (new_id_str != NULL)
27948       {
27949         StringNCpy (new_id_str, title_txt, id_len);
27950         new_id_str [id_len] = 0;
27951         bsp->id = MakeSeqID (new_id_str);
27952         new_id_str = MemFree (new_id_str);
27953         /* remove id from title */
27954         title_txt += id_len;
27955         if (remove_punct)
27956         {
27957           title_txt ++;
27958         }
27959         title_txt += StringSpn (title_txt, " \t");
27960         title_txt = StringSave (title_txt);
27961         sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
27962         sdp->data.ptrvalue = title_txt;
27963       }
27964     }
27965   }
27966 }
27967 
27968 //Not used for Autodef or cleanup
PutDeflineIDBackInTitle(BioseqPtr bsp)27969 static void PutDeflineIDBackInTitle (BioseqPtr bsp)
27970 {
27971   SeqDescrPtr sdp;
27972   CharPtr     id_txt;
27973   CharPtr     title_txt;
27974 
27975   if (bsp == NULL || bsp->id == NULL)
27976   {
27977     return;
27978   }
27979 
27980   sdp = bsp->descr;
27981   while (sdp != NULL && sdp->choice != Seq_descr_title)
27982   {
27983     sdp = sdp->next;
27984   }
27985   if (sdp == NULL)
27986   {
27987     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_title);
27988   }
27989   if (sdp == NULL)
27990   {
27991     return;
27992   }
27993 
27994   id_txt = SeqIdWholeLabel (bsp->id, PRINTID_REPORT);
27995 
27996   if (StringHasNoText (sdp->data.ptrvalue))
27997   {
27998     sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
27999     sdp->data.ptrvalue = id_txt;
28000   }
28001   else
28002   {
28003     title_txt = (CharPtr) MemNew (sizeof (Char) * (StringLen (id_txt) + StringLen (sdp->data.ptrvalue) + 2));
28004     StringCpy (title_txt, id_txt);
28005     StringCat (title_txt, " ");
28006     StringCat (title_txt, sdp->data.ptrvalue);
28007     sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
28008     sdp->data.ptrvalue = title_txt;
28009     id_txt = MemFree (id_txt);
28010   }
28011 
28012   bsp->id = SeqIdFree (bsp->id);
28013 }
28014 
28015 
28016 //Not used for Autodef or cleanup
28017 static SeqEntryPtr
ImportOneNucBioseq(FILE * fp,BoolPtr this_chars_stripped,Boolean parse_id,CharPtr supplied_id_txt,CharPtr lastchar,BoolPtr read_from_delta,BoolPtr isAsn,ValNodePtr PNTR err_msg_list)28018 ImportOneNucBioseq
28019 (FILE   *fp,
28020  BoolPtr this_chars_stripped,
28021  Boolean parse_id,
28022  CharPtr supplied_id_txt,
28023  CharPtr lastchar,
28024  BoolPtr read_from_delta,
28025  BoolPtr isAsn,
28026  ValNodePtr PNTR err_msg_list)
28027 {
28028   SeqEntryPtr sep = NULL;
28029   BioseqPtr   bsp = NULL;
28030   ErrSev      oldsev;
28031 
28032   if (feof (fp)) {
28033     *lastchar = 0;
28034     return NULL;
28035   }
28036   oldsev = ErrSetMessageLevel (SEV_MAX);
28037   /* first, try to read as delta sequence */
28038   bsp = ReadDeltaFastaEx (fp, NULL, this_chars_stripped);
28039   ErrSetMessageLevel (oldsev);
28040   if (bsp == NULL)
28041   {
28042     *read_from_delta = FALSE;
28043     /* if failed, try regular FASTA */
28044     /* note - we pass in FALSE for parse_id in SequinFastaToSeqEntryEx
28045      * because we do not want to use Sequin's auto-generated sequence IDs.
28046      * We then parse the sequence ID from the title ourselves using
28047      * ReplaceFakeIDWithIDFromTitle if parse_id is TRUE, or leave the ID
28048      * as blank to force the user to select a real ID later.
28049      */
28050 
28051     bsp = ReadFastaOnly (fp,
28052                          TRUE, FALSE,
28053                          this_chars_stripped,
28054                          lastchar);
28055     if (bsp == NULL)
28056     {
28057 /*
28058       if (*lastchar == '[')
28059       {
28060         sep = ReadOneSegSet (fp, parse_id, err_msg_list, this_chars_stripped);
28061       }
28062       else  */
28063       if (*lastchar == 0)
28064       {
28065         *lastchar = GetNextCharacterFromFile(fp, isAsn);
28066       }
28067     }
28068     else
28069     {
28070       if (parse_id)
28071       {
28072         ReplaceFakeIDWithIDFromTitle (bsp);
28073       }
28074       else
28075       {
28076         bsp->id = SeqIdFree (bsp->id);
28077         if (!StringHasNoText (supplied_id_txt))
28078         {
28079         	bsp->id = MakeSeqID (supplied_id_txt);
28080         }
28081       }
28082       sep = SeqMgrGetSeqEntryForData (bsp);
28083       SeqEntryPack (sep);
28084     }
28085   }
28086   else
28087   {
28088     /* successfully read delta sequence */
28089     *read_from_delta = TRUE;
28090 
28091     if (!parse_id)
28092     {
28093       PutDeflineIDBackInTitle (bsp);
28094       if (!StringHasNoText (supplied_id_txt))
28095       {
28096         bsp->id = MakeSeqID (supplied_id_txt);
28097       }
28098     }
28099     sep = SeqMgrGetSeqEntryForData (bsp);
28100   }
28101 
28102   return sep;
28103 }
28104 
28105 //Not used for Autodef or cleanup
HasGapID(SeqEntryPtr sep)28106 static Boolean HasGapID (SeqEntryPtr sep)
28107 {
28108   BioseqPtr bsp;
28109   Char      id_str [128];
28110   Int4      j;
28111 
28112   if (sep == NULL || ! IS_Bioseq (sep) || (bsp = sep->data.ptrvalue) == NULL)
28113   {
28114     return FALSE;
28115   }
28116 
28117   SeqIdWrite (bsp->id, id_str, PRINTID_REPORT, sizeof (id_str));
28118 
28119   if (id_str [0] != '?')
28120   {
28121     return FALSE;
28122   }
28123   if (StringICmp (id_str + 1, "unk100") == 0)
28124   {
28125     return TRUE;
28126   }
28127   else
28128   {
28129     /* make sure there are only numbers after the question mark */
28130     j = 1;
28131     while (isdigit (id_str [j]))
28132     {
28133       j++;
28134     }
28135     if (id_str [j] == 0)
28136     {
28137       return TRUE;
28138     }
28139     else
28140     {
28141       return FALSE;
28142     }
28143   }
28144 }
28145 
28146 
28147 //Not used for Autodef or cleanup
HasNoSeqID(SeqEntryPtr sep)28148 static Boolean HasNoSeqID (SeqEntryPtr sep)
28149 {
28150   BioseqPtr bsp;
28151 
28152   if (sep == NULL || ! IS_Bioseq (sep) || (bsp = sep->data.ptrvalue) == NULL)
28153   {
28154     return FALSE;
28155   }
28156   if (bsp->id == NULL)
28157   {
28158     return TRUE;
28159   }
28160   else
28161   {
28162     return FALSE;
28163   }
28164 }
28165 
28166 
28167 //Not used for Autodef or cleanup
FindLineForStartOfBadRead(FILE * fp,Int4 pos)28168 static Int4 FindLineForStartOfBadRead (FILE *fp, Int4 pos)
28169 {
28170   FileCache    fc;
28171   Int4         line_num = 0;
28172   Char         line [4096];
28173   CharPtr      str;
28174 
28175   if (fp == NULL || pos == 0) {
28176     return 0;
28177   }
28178 
28179   FileCacheSetup (&fc, fp);
28180   FileCacheSeek (&fc, 0);
28181   fseek (fp, 0, SEEK_SET);
28182   str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
28183   while (str != NULL && FileCacheTell (&fc) < pos) {
28184       line_num++;
28185       str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
28186   }
28187   return line_num;
28188 }
28189 
28190 
28191 //Not used for Autodef or cleanup
FindLineForBadReadChar(FILE * fp,Char badchar)28192 static Int4 FindLineForBadReadChar (FILE *fp, Char badchar)
28193 {
28194   FileCache    fc;
28195   Int4         line_num = 0;
28196   Char         line [4096];
28197   CharPtr      str;
28198 
28199   if (fp == NULL) {
28200     return 0;
28201   }
28202 
28203   FileCacheSetup (&fc, fp);
28204   FileCacheSeek (&fc, 0);
28205   fseek (fp, 0, SEEK_SET);
28206   str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
28207   while (str != NULL && StringChr (str, badchar) == NULL) {
28208       line_num++;
28209       str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
28210   }
28211   return line_num;
28212 }
28213 
28214 
28215 //Not used for Autodef or cleanup
28216 NLM_EXTERN SeqEntryPtr
ImportNucleotideFASTASequencesFromFileEx(FILE * fp,Boolean parse_id,CharPtr supplied_id_txt,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped,Boolean allow_char_stripping,Nlm_ImportSeqCallbackProc callback,Pointer callback_data)28217 ImportNucleotideFASTASequencesFromFileEx
28218 (FILE           *fp,
28219  Boolean         parse_id,
28220  CharPtr         supplied_id_txt,
28221  ValNodePtr PNTR err_msg_list,
28222  BoolPtr         chars_stripped,
28223  Boolean         allow_char_stripping,
28224  Nlm_ImportSeqCallbackProc callback,
28225  Pointer                   callback_data)
28226 {
28227   Int4          seq_count = 0, nt_count = 0;
28228   SeqEntryPtr   last;
28229   Char          lastchar = '\0';
28230   SeqEntryPtr   nextsep;
28231   BioseqPtr     bsp = NULL;
28232   SeqEntryPtr   new_sep_list = NULL;
28233   Boolean       read_from_delta;
28234   Int4          pos, last_no_id_start = -1;
28235   Boolean       this_chars_stripped = FALSE;
28236   Boolean       isASN = FALSE;
28237   Int4          bad_start = 0, bad_line = 0;
28238 
28239   if (chars_stripped != NULL)
28240   {
28241     *chars_stripped = FALSE;
28242   }
28243 
28244   new_sep_list = NULL;
28245   last = NULL;
28246 
28247   pos = ftell (fp);
28248 
28249   while ((nextsep = ImportOneNucBioseq (fp, &this_chars_stripped, parse_id, supplied_id_txt, &lastchar, &read_from_delta, &isASN, err_msg_list)) != NULL)
28250   {
28251     if (new_sep_list == NULL) {
28252       new_sep_list = nextsep;
28253     }
28254     if (chars_stripped != NULL)
28255     {
28256       *chars_stripped |= this_chars_stripped;
28257     }
28258     if (last_no_id_start > -1)
28259     {
28260       if (HasGapID (nextsep))
28261       {
28262         nextsep = SeqEntryFree (nextsep);
28263         bsp = last->data.ptrvalue;
28264         SeqMgrDeleteFromBioseqIndex (bsp);
28265         bsp = BioseqFree (bsp);
28266         fseek (fp, last_no_id_start, SEEK_SET);
28267         bsp = ReadDeltaFastaWithEmptyDefline (fp, NULL, chars_stripped);
28268         last->data.ptrvalue = bsp;
28269         bsp->id = SeqIdFree (bsp->id);
28270         last_no_id_start = -1;
28271       }
28272       else if (HasNoSeqID (nextsep))
28273       {
28274         last_no_id_start = pos;
28275       }
28276       else
28277       {
28278         last_no_id_start = -1;
28279       }
28280     }
28281     else if (HasNoSeqID (nextsep))
28282     {
28283       last_no_id_start = pos;
28284     }
28285 
28286     /* add to list */
28287     if (nextsep != NULL)
28288     {
28289       if (last == NULL)
28290       {
28291         new_sep_list = nextsep;
28292         last = nextsep;
28293       }
28294       else
28295       {
28296         last->next = nextsep;
28297         last = nextsep;
28298       }
28299       seq_count++;
28300       if (IS_Bioseq (nextsep) && (bsp = (BioseqPtr) nextsep->data.ptrvalue) != NULL) {
28301         nt_count += bsp->length;
28302       }
28303       if (callback != NULL) {
28304         callback (seq_count, nt_count, callback_data);
28305       }
28306     }
28307     pos = ftell (fp);
28308     if (!allow_char_stripping && this_chars_stripped)
28309     {
28310       break;
28311     }
28312   }
28313 
28314   if ((!allow_char_stripping && this_chars_stripped) || (lastchar != (Char) EOF && lastchar != NULLB && lastchar != (Char) 255))
28315   {
28316     if (!this_chars_stripped && !isASN) {
28317       bad_start = FindLineForStartOfBadRead (fp, pos);
28318       bad_line = FindLineForBadReadChar (fp, lastchar);
28319       Message (MSG_ERROR, "Unable to read file, starting at line %d (found bad character '%c' at line %d)!", bad_start, lastchar, bad_line);
28320     }
28321     new_sep_list = SeqEntryFree (new_sep_list);
28322   }
28323   return new_sep_list;
28324 }
28325 
28326 //Not used for Autodef or cleanup
28327 NLM_EXTERN SeqEntryPtr
ImportNucleotideFASTASequencesFromFile(FILE * fp,Boolean parse_id,CharPtr supplied_id_txt,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped,Boolean allow_char_stripping)28328 ImportNucleotideFASTASequencesFromFile
28329 (FILE           *fp,
28330  Boolean         parse_id,
28331  CharPtr         supplied_id_txt,
28332  ValNodePtr PNTR err_msg_list,
28333  BoolPtr         chars_stripped,
28334  Boolean         allow_char_stripping)
28335 {
28336   return ImportNucleotideFASTASequencesFromFileEx (fp, parse_id, supplied_id_txt,
28337                        err_msg_list, chars_stripped, allow_char_stripping, NULL, NULL);
28338 }
28339 
28340 
28341 //Not used for Autodef or cleanup
StripStopCodons(SeqEntryPtr sep_list)28342 static void StripStopCodons (SeqEntryPtr sep_list)
28343 {
28344   BioseqPtr pbsp;
28345   Char      prot_str[2];
28346 
28347   prot_str[1] = 0;
28348   while (sep_list != NULL) {
28349     if (IS_Bioseq (sep_list) && (pbsp = (BioseqPtr) sep_list->data.ptrvalue) != NULL
28350         && ISA_aa(pbsp->mol) && pbsp->repr == Seq_repr_raw) {
28351       SeqPortStreamInt (pbsp, pbsp->length - 1, pbsp->length - 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) (prot_str), NULL);
28352       if (prot_str[0] == '*') {
28353         /* trim trailing stop codon */
28354         BSSeek ((ByteStorePtr) pbsp->seq_data, -1, SEEK_END);
28355         BSDelete ((ByteStorePtr) pbsp->seq_data, 1);
28356         pbsp->length -= 1;
28357       }
28358     }
28359     sep_list = sep_list->next;
28360   }
28361 }
28362 
28363 
28364 //Not used for Autodef or cleanup
ImportProteinFASTASequences(FILE * fp,Boolean parse_id,CharPtr supplied_id_txt,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped)28365 NLM_EXTERN SeqEntryPtr ImportProteinFASTASequences
28366 (FILE            *fp,
28367  Boolean         parse_id,
28368  CharPtr         supplied_id_txt,
28369  ValNodePtr PNTR err_msg_list,
28370  BoolPtr         chars_stripped)
28371 {
28372   Pointer       dataptr;
28373   Uint2         datatype;
28374   SeqEntryPtr   new_list = NULL, nextsep, lastsep = NULL;
28375   Boolean       error_reading = FALSE;
28376   Int4          pos, bad_start;
28377   BioseqPtr     bsp;
28378 
28379   pos = ftell (fp);
28380   dataptr = ReadAsnFastaOrFlatFileEx (fp, &datatype, NULL, FALSE, TRUE,
28381                                     FALSE, FALSE, chars_stripped);
28382   while (dataptr != NULL)
28383   {
28384     bsp = NULL;
28385     if (datatype == OBJ_SEQENTRY)
28386     {
28387       nextsep = (SeqEntryPtr) dataptr;
28388       if (IS_Bioseq(nextsep)) {
28389         bsp = nextsep->data.ptrvalue;
28390       } else {
28391         nextsep = NULL;
28392       }
28393     }
28394     else if (datatype == OBJ_BIOSEQ)
28395     {
28396       bsp = dataptr;
28397       nextsep = SeqMgrGetSeqEntryForData(bsp);
28398     }
28399     if (bsp != NULL)
28400     {
28401       if (parse_id)
28402       {
28403         ReplaceFakeIDWithIDFromTitle ((BioseqPtr) nextsep->data.ptrvalue);
28404       }
28405       else
28406       {
28407         if (!StringHasNoText (supplied_id_txt))
28408         {
28409           bsp->id = MakeSeqID (supplied_id_txt);
28410         }
28411       }
28412       SeqEntryPack (nextsep);
28413       if (lastsep == NULL) {
28414         new_list = nextsep;
28415       } else {
28416         lastsep->next = nextsep;
28417       }
28418       lastsep = nextsep;
28419       pos = ftell (fp);
28420       dataptr = ReadAsnFastaOrFlatFileEx (fp, &datatype, NULL, FALSE, TRUE,
28421                                           FALSE, FALSE, chars_stripped);
28422     }
28423     else
28424     {
28425       if (dataptr != NULL) {
28426         error_reading = TRUE;
28427       }
28428       dataptr = NULL;
28429     }
28430   }
28431   if (*chars_stripped || error_reading)
28432   {
28433     bad_start = FindLineForStartOfBadRead (fp, pos);
28434     Message (MSG_ERROR, "Unable to read file, starting at line %d!", bad_start);
28435     new_list = SeqEntryFree (new_list);
28436   }
28437 
28438   StripStopCodons (new_list);
28439 
28440   return new_list;
28441 
28442 }
28443 
28444 
28445 //Not used for Autodef or cleanup
AddUniqueUpdateSequenceIDs(SeqEntryPtr sep)28446 NLM_EXTERN void AddUniqueUpdateSequenceIDs (SeqEntryPtr sep)
28447 {
28448   BioseqPtr bsp;
28449   BioseqSetPtr bssp;
28450 
28451   if (sep == NULL)
28452   {
28453     return;
28454   }
28455   else if (IS_Bioseq (sep))
28456   {
28457     bsp = (BioseqPtr) sep->data.ptrvalue;
28458     if (bsp != NULL && bsp->id == NULL)
28459     {
28460       bsp->id = MakeUniqueSeqID ("UpdateSequence");
28461     }
28462   }
28463   else if (IS_Bioseq_set (sep))
28464   {
28465     /* we could add IDs to segmented sets, but maybe we should just remove them? */
28466     bssp = (BioseqSetPtr) sep->data.ptrvalue;
28467     if (bssp != NULL)
28468     {
28469       AddUniqueUpdateSequenceIDs (bssp->seq_set);
28470     }
28471   }
28472 
28473   AddUniqueUpdateSequenceIDs (sep->next);
28474 }
28475 
28476 //Not used for Autodef or cleanup
28477 NLM_EXTERN void
ListBioseqsInSeqEntry(SeqEntryPtr sep,Boolean is_na,Int4Ptr seq_num,ValNodePtr PNTR bioseq_list)28478 ListBioseqsInSeqEntry
28479 (SeqEntryPtr     sep,
28480  Boolean         is_na,
28481  Int4Ptr         seq_num,
28482  ValNodePtr PNTR bioseq_list)
28483 {
28484   BioseqPtr    bsp;
28485   BioseqSetPtr bssp;
28486 
28487   if (sep == NULL || bioseq_list == NULL || seq_num == NULL)
28488   {
28489     return;
28490   }
28491   if (IS_Bioseq (sep) && sep->data.ptrvalue != NULL)
28492   {
28493     bsp = (BioseqPtr) sep->data.ptrvalue;
28494     if (ISA_na (bsp->mol))
28495     {
28496       if (is_na)
28497       {
28498         ValNodeAddPointer (bioseq_list, *seq_num, bsp);
28499         (*seq_num)++;
28500       }
28501     }
28502     else if (!is_na)
28503     {
28504       ValNodeAddPointer (bioseq_list, *seq_num, bsp);
28505       (*seq_num)++;
28506     }
28507   }
28508   else if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL)
28509   {
28510     bssp = (BioseqSetPtr) sep->data.ptrvalue;
28511     ListBioseqsInSeqEntry (bssp->seq_set, is_na, seq_num, bioseq_list);
28512   }
28513 
28514   ListBioseqsInSeqEntry (sep->next, is_na, seq_num, bioseq_list);
28515 }
28516 
28517 
28518 //Not used for Autodef or cleanup
SeqIdListsOverlap(SeqIdPtr sip1,SeqIdPtr sip2)28519 static Boolean SeqIdListsOverlap (SeqIdPtr sip1, SeqIdPtr sip2)
28520 {
28521   SeqIdPtr sip_next;
28522   Char     tmp_id_str [255];
28523   Boolean  rval = FALSE;
28524   StringConstraint scd;
28525 
28526   MemSet (&scd, 0, sizeof (StringConstraint));
28527 
28528   while (sip1 != NULL && !rval)
28529   {
28530     if (SeqIdIn (sip1, sip2))
28531     {
28532       rval = TRUE;
28533     }
28534     else if (sip1->choice == SEQID_LOCAL)
28535     {
28536       sip_next = sip1->next;
28537       sip1->next = NULL;
28538       SeqIdWrite (sip1, tmp_id_str, PRINTID_REPORT, sizeof (tmp_id_str) - 1);
28539       sip1->next = sip_next;
28540       scd.match_text = tmp_id_str;
28541       scd.match_location = String_location_equals;
28542       rval = DoesSeqIDListMeetStringConstraint (sip2, &scd);
28543     }
28544     sip1 = sip1->next;
28545   }
28546   return rval;
28547 }
28548 
28549 
28550 //Not used for Autodef or cleanup
ShuffleUpdateBioseqList(ValNodePtr PNTR update_bioseq_list,ValNodePtr orig_bioseq_list)28551 NLM_EXTERN ValNodePtr ShuffleUpdateBioseqList (ValNodePtr PNTR update_bioseq_list, ValNodePtr orig_bioseq_list)
28552 {
28553   ValNodePtr unmatched_list = NULL;
28554   ValNodePtr orig_vnp, update_vnp;
28555   ValNodePtr new_update_list = NULL;
28556   Int4       bsp_pos = 0, update_pos, pos;
28557   BioseqPtr  orig_bsp, update_bsp;
28558 
28559   if (update_bioseq_list == NULL || *update_bioseq_list == NULL)
28560   {
28561     return NULL;
28562   }
28563   else if (orig_bioseq_list == NULL)
28564   {
28565     unmatched_list = *update_bioseq_list;
28566     *update_bioseq_list = NULL;
28567   }
28568 
28569   for (orig_vnp = orig_bioseq_list; orig_vnp != NULL; orig_vnp = orig_vnp->next)
28570   {
28571     if (orig_vnp->data.ptrvalue == NULL)
28572     {
28573       ValNodeAddPointer (&new_update_list, bsp_pos, NULL);
28574       bsp_pos ++;
28575       continue;
28576     }
28577     orig_bsp = (BioseqPtr) orig_vnp->data.ptrvalue;
28578     update_pos = -1;
28579     for (update_vnp = *update_bioseq_list, pos = 0;
28580          update_vnp != NULL && update_pos < 0;
28581          update_vnp = update_vnp->next, pos++)
28582     {
28583       if (update_vnp->data.ptrvalue != NULL)
28584       {
28585         update_bsp = (BioseqPtr) update_vnp->data.ptrvalue;
28586         if (SeqIdListsOverlap (update_bsp->id, orig_bsp->id))
28587         {
28588           update_pos = pos;
28589         }
28590       }
28591     }
28592     if (update_pos >= 0)
28593     {
28594       update_vnp = ExtractNthValNode (update_bioseq_list, update_pos);
28595       update_vnp->choice = bsp_pos;
28596       ValNodeLink (&new_update_list, update_vnp);
28597     }
28598     else
28599     {
28600       ValNodeAddPointer (&new_update_list, bsp_pos, NULL);
28601     }
28602     bsp_pos++;
28603   }
28604 
28605   unmatched_list = *update_bioseq_list;
28606   *update_bioseq_list = new_update_list;
28607 
28608   /* renumber unmatched_list */
28609   for (update_vnp = unmatched_list, update_pos = 0;
28610        update_vnp != NULL;
28611        update_vnp = update_vnp->next, update_pos++)
28612   {
28613     update_vnp->choice = update_pos;
28614   }
28615 
28616   return unmatched_list;
28617 }
28618 
28619 //Not used for Autodef or cleanup
28620 /* This function compares the text from a local ID against the
28621  * report string from non-local IDs in sip_list, useful when
28622  * comparing values from a file in which the user did not specify
28623  * the version or the gb| in the sequence ID.
28624  */
RelaxedSeqIdIn(SeqIdPtr sip,SeqIdPtr sip_list)28625 NLM_EXTERN Boolean RelaxedSeqIdIn (SeqIdPtr sip, SeqIdPtr sip_list)
28626 {
28627   SeqIdPtr sip_next;
28628   Char     id_txt1 [128], id_txt2 [128];
28629   CharPtr  ptr;
28630   Int4     len;
28631   DbtagPtr dp;
28632   Int4     bankit_num;
28633 
28634   if (sip == NULL || sip_list == NULL || sip->choice != SEQID_LOCAL)
28635   {
28636     return FALSE;
28637   }
28638 
28639   SeqIdWrite (sip, id_txt1, PRINTID_REPORT, sizeof (id_txt1) - 1);
28640 
28641   while (sip_list != NULL)
28642   {
28643     if (sip_list->choice != SEQID_LOCAL)
28644     {
28645       sip_next = sip_list->next;
28646       sip_list->next = NULL;
28647       SeqIdWrite (sip_list, id_txt2, PRINTID_REPORT, sizeof (id_txt1) - 1);
28648       sip_list->next = sip_next;
28649       if (StringCmp (id_txt1, id_txt2) == 0)
28650       {
28651         return TRUE;
28652       }
28653       ptr = StringChr (id_txt2, '.');
28654       if (ptr != NULL)  /* ID in list has version */
28655       {
28656         len = StringLen (id_txt1);
28657         if (len == ptr - id_txt2 && StringNCmp (id_txt1, id_txt2, len) == 0)
28658         {
28659           return TRUE;
28660         }
28661       }
28662       if (StringNICmp (id_txt1, "bankit", 6) == 0
28663           && sip_list->choice == SEQID_GENERAL
28664           && sip_list->data.ptrvalue != NULL) {
28665         bankit_num = atoi (id_txt1 + 6);
28666         dp = (DbtagPtr) sip_list->data.ptrvalue;
28667         if(StringICmp(dp->db, "BankIt") == 0 && dp->tag != NULL && dp->tag->id == bankit_num) {
28668           return TRUE;
28669         }
28670       }
28671     }
28672     sip_list = sip_list->next;
28673   }
28674   return FALSE;
28675 }
28676 
28677 
28678 //Not used for Autodef or cleanup
FindBioseqInList(ValNodePtr bioseq_list,SeqIdPtr sip,Int4Ptr position)28679 NLM_EXTERN BioseqPtr FindBioseqInList (ValNodePtr bioseq_list, SeqIdPtr sip, Int4Ptr position)
28680 {
28681   ValNodePtr vnp;
28682   BioseqPtr  bsp = NULL;
28683   Int4       vnp_pos;
28684 
28685   if (position != NULL)
28686   {
28687     *position = -1;
28688   }
28689   if (bioseq_list == NULL)
28690   {
28691     return NULL;
28692   }
28693 
28694   for (vnp = bioseq_list, vnp_pos = 0;
28695        vnp != NULL && bsp == NULL;
28696        vnp = vnp->next, vnp_pos++)
28697   {
28698     bsp = (BioseqPtr) vnp->data.ptrvalue;
28699     if (SeqIdIn (sip, bsp->id) || RelaxedSeqIdIn (sip, bsp->id))
28700     {
28701       if (position != NULL)
28702       {
28703         *position = vnp_pos;
28704       }
28705     }
28706     else
28707     {
28708       bsp = NULL;
28709     }
28710   }
28711   return bsp;
28712 }
28713 
28714 
28715 //Not used for Autodef or cleanup
28716 /* This function should find all update Bioseqs that have colliding sequence IDs and
28717  * replace the colliding IDs with new sequence IDs.
28718  */
ReplaceCollidingUpdateIDs(ValNodePtr update_bioseq_list,ValNodePtr orig_bioseq_list)28719 NLM_EXTERN void ReplaceCollidingUpdateIDs (ValNodePtr update_bioseq_list, ValNodePtr orig_bioseq_list)
28720 {
28721   ValNodePtr vnp, orig_vnp;
28722   SeqIdPtr   replace_sip, sip;
28723   BioseqPtr  bsp;
28724   Char       id_txt [128];
28725   Int4       orig_pos;
28726 
28727   if (update_bioseq_list == NULL || orig_bioseq_list == NULL)
28728   {
28729     return;
28730   }
28731 
28732   for (vnp = update_bioseq_list; vnp != NULL; vnp = vnp->next)
28733   {
28734     bsp = (BioseqPtr) vnp->data.ptrvalue;
28735     if (bsp != NULL)
28736     {
28737       if (FindBioseqInList (orig_bioseq_list, bsp->id, &orig_pos))
28738       {
28739         orig_vnp = GetNthValNode (orig_bioseq_list, orig_pos);
28740         if (orig_vnp != NULL && orig_vnp->data.ptrvalue != NULL)
28741         {
28742           replace_sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
28743           SeqIdWrite (replace_sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
28744           StringCat (id_txt, "_update");
28745           sip = MakeUniqueSeqID (id_txt);
28746           SeqMgrDeleteFromBioseqIndex (bsp);
28747           BioseqReplaceID (bsp, sip);
28748           sip = SeqIdFree (sip);
28749 
28750           SeqMgrReplaceInBioseqIndex (orig_vnp->data.ptrvalue);
28751           SeqMgrReplaceInBioseqIndex (bsp);
28752         }
28753       }
28754     }
28755   }
28756 
28757 }
28758 
28759 
28760 //Not used for Autodef or cleanup
RemoveSequencesWithoutUpdates(ValNodePtr PNTR orig_bioseq_list,ValNodePtr PNTR update_bioseq_list)28761 NLM_EXTERN void RemoveSequencesWithoutUpdates (ValNodePtr PNTR orig_bioseq_list, ValNodePtr PNTR update_bioseq_list)
28762 {
28763   ValNodePtr orig_prev = NULL, update_prev = NULL;
28764   ValNodePtr orig_next = NULL, update_next = NULL;
28765   ValNodePtr orig_vnp, update_vnp;
28766   Int4       seq_num;
28767 
28768   if (orig_bioseq_list == NULL || update_bioseq_list == NULL
28769       || *orig_bioseq_list == NULL || *update_bioseq_list == NULL)
28770   {
28771     return;
28772   }
28773 
28774   orig_vnp = *orig_bioseq_list;
28775   update_vnp = *update_bioseq_list;
28776 
28777   while (orig_vnp != NULL && update_vnp != NULL)
28778   {
28779     orig_next = orig_vnp->next;
28780     update_next = update_vnp->next;
28781     if (orig_vnp->data.ptrvalue == NULL || update_vnp->data.ptrvalue == NULL)
28782     {
28783       if (orig_prev == NULL || update_prev == NULL)
28784       {
28785         *orig_bioseq_list = orig_vnp->next;
28786         *update_bioseq_list = update_vnp->next;
28787       }
28788       else
28789       {
28790         orig_prev->next = orig_vnp->next;
28791         update_prev->next = update_vnp->next;
28792       }
28793       orig_vnp->next = NULL;
28794       update_vnp->next = NULL;
28795       ValNodeFree (orig_vnp);
28796       ValNodeFree (update_vnp);
28797     }
28798     else
28799     {
28800       orig_prev = orig_vnp;
28801       update_prev = update_vnp;
28802     }
28803 
28804     orig_vnp = orig_next;
28805     update_vnp = update_next;
28806   }
28807 
28808   for (orig_vnp = *orig_bioseq_list, update_vnp = *update_bioseq_list, seq_num = 0;
28809        orig_vnp != NULL && update_vnp != NULL;
28810        orig_vnp = orig_vnp->next, update_vnp = update_vnp->next, seq_num++)
28811   {
28812     orig_vnp->choice = seq_num;
28813     update_vnp->choice = seq_num;
28814   }
28815 }
28816 
28817 
28818 //Not used for Autodef or Cleanup
AlignForSequenceUpdate(BioseqPtr bsp1,BioseqPtr bsp2,BoolPtr revcomp,GlobalAlignFunc align_func)28819 NLM_EXTERN SeqAlignPtr AlignForSequenceUpdate (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp, GlobalAlignFunc align_func)
28820 {
28821   SeqIdPtr    old_id;
28822   SeqAlignPtr salp = NULL;
28823   Boolean     put_id_back = FALSE;
28824 
28825   if (bsp1 == NULL || bsp2 == NULL || align_func == NULL) {
28826     return NULL;
28827   }
28828 
28829   if (SeqIdIn (bsp2->id, bsp1->id)) {
28830     old_id = bsp2->id;
28831     bsp2->id = MakeUniqueSeqID ("UpdateSequence");
28832     SeqMgrReplaceInBioseqIndex (bsp2);
28833     put_id_back = TRUE;
28834   }
28835   salp = align_func (bsp1, bsp2, revcomp);
28836   if (put_id_back) {
28837     bsp2->id = SeqIdFree (bsp2->id);
28838     bsp2->id = old_id;
28839     SeqMgrReplaceInBioseqIndex (bsp2);
28840   }
28841   return salp;
28842 }
28843 
28844 
28845 //Not used for Autodef or Cleanup
AuthorFromEndnoteString(CharPtr val)28846 static AuthorPtr AuthorFromEndnoteString (CharPtr val)
28847 {
28848   AuthorPtr       auth;
28849   NameStdPtr      authname;
28850   Int4            len;
28851   CharPtr         comma;
28852 
28853   auth = AuthorNew ();
28854   auth->name = PersonIdNew ();
28855 
28856   comma = StringChr (val, ',');
28857   if (comma == NULL) {
28858     auth->name->choice = 5;
28859     auth->name->data = StringSave (val);
28860   } else {
28861     auth->name->choice = 2;
28862     authname = NameStdNew ();
28863     auth->name->data = authname;
28864     len = comma - val + 1;
28865     authname->names[0] = (CharPtr)MemNew (sizeof (Char) * len);
28866     StringNCpy (authname->names[0], val, len - 1);
28867     authname->names[0][len - 1] = 0;
28868     val = comma + 1;
28869     while (*val <= 256 && *val >= -1 && isspace (*val)) {
28870       val++;
28871     }
28872     comma = StringChr (val, ' ');
28873     if (comma) {
28874       /* have middle name/initials */
28875       len = comma - val + 1;
28876       authname->names[1] = (CharPtr)MemNew (sizeof (Char) * len);
28877       StringNCpy (authname->names[1], val, len - 1);
28878       authname->names[1][len - 1] = 0;
28879       val = comma + 1;
28880       while (isspace (*val)) {
28881         val++;
28882       }
28883       authname->names[4] = (CharPtr) MemNew (sizeof (Char) * (3 + StringLen (val)));
28884       sprintf (authname->names[4], "%c.%s", authname->names[1][0], val);
28885     } else {
28886       authname->names[1] = StringSave (val);
28887       authname->names[4] = (CharPtr) MemNew (sizeof (Char) * 3);
28888       sprintf (authname->names[4], "%c.", authname->names[1][0]);
28889     }
28890   }
28891   return auth;
28892 }
28893 
28894 
28895 //Not used for Autodef or Cleanup
ParsePubFromEndnote(FILE * fp)28896 NLM_EXTERN PubPtr ParsePubFromEndnote (FILE *fp)
28897 {
28898   ReadBufferData  rbd;
28899   CharPtr         line;
28900   PubPtr          pub;
28901   CharPtr         val, last, tmp;
28902   CitArtPtr       cit;
28903   CitJourPtr      jour;
28904   AuthorPtr       auth;
28905   Boolean         anything = FALSE;
28906 
28907   if (fp == NULL) {
28908     return NULL;
28909   }
28910   jour = CitJourNew ();
28911   cit = CitArtNew ();
28912   cit->from = 1;
28913   cit->fromptr = jour;
28914   pub = ValNodeNew (NULL);
28915   pub->choice = PUB_Article;
28916   pub->data.ptrvalue = cit;
28917   rbd.fp = fp;
28918   rbd.current_data = NULL;
28919   line = AbstractReadFunction (&rbd);
28920   while (line != NULL && line[0] != EOF) {
28921     val = StringChr (line, '-');
28922     if (val == NULL && line[0] == '%') {
28923       val = StringChr (line, ' ');
28924     }
28925     if (val != NULL) {
28926       val++;
28927       while (isspace (*val)) {
28928         val++;
28929       }
28930       last = val + StringLen (val) - 1;
28931       while (last > val && *last <= 256 && *last >= -1 && isspace (*last)) {
28932         last--;
28933       }
28934       if (last > val && isspace (*(last + 1))) {
28935         *(last + 1) = 0;
28936       }
28937       if (!StringHasNoText (val)) {
28938         if (StringNICmp (line, "TY", 2) == 0) {
28939           if (StringICmp (val, "JOUR") != 0) {
28940             pub = PubFree (pub);
28941             return NULL;
28942           }
28943         } else if (StringNICmp (line, "%0", 2) == 0) {
28944           if (StringICmp (val, "Journal Article") != 0) {
28945             pub = PubFree (pub);
28946             return NULL;
28947           }
28948         } else if (StringNICmp (line, "TI", 2) == 0
28949                 || StringNICmp (line, "T1", 2) == 0
28950                 || StringNICmp (line, "%T", 2) == 0) {
28951           ValNodeAddPointer (&(cit->title), 1, StringSave (val));
28952           anything = TRUE;
28953         } else if (StringNICmp (line, "JO", 2) == 0
28954                 || StringNICmp (line, "JF", 2) == 0
28955                 || StringNICmp (line, "%J", 2) == 0) {
28956           ValNodeAddPointer (&(jour->title), 1, StringSave (val));
28957           anything = TRUE;
28958         } else if (StringNICmp (line, "PB", 2) == 0) {
28959           if (jour->imp == NULL) {
28960             jour->imp = ImprintNew();
28961           }
28962           if (jour->imp->pub == NULL) {
28963             jour->imp->pub = AffilNew ();
28964           }
28965           jour->imp->pub->affil = StringSave (val);
28966         } else if (StringNICmp (line, "PY", 2) == 0
28967                 || StringNICmp (line, "Y1", 2) == 0
28968                 || StringNICmp (line, "%D", 2) == 0) {
28969           if (jour->imp == NULL) {
28970             jour->imp = ImprintNew();
28971           }
28972           if (jour->imp->date == NULL) {
28973             jour->imp->date = DateNew ();
28974           }
28975           jour->imp->date->data[0] = 1;
28976           jour->imp->date->data[1] = atoi (val) - 1900;
28977           anything = TRUE;
28978         } else if (StringNICmp (line, "IS", 2) == 0
28979                 || StringNICmp (line, "%N", 2) == 0) {
28980           if (jour->imp == NULL) {
28981             jour->imp = ImprintNew();
28982           }
28983           jour->imp->issue = StringSave (val);
28984         } else if (StringNICmp (line, "VL", 2) == 0
28985                 || StringNICmp (line, "%V", 2) == 0) {
28986           if (jour->imp == NULL) {
28987             jour->imp = ImprintNew();
28988           }
28989           jour->imp->volume = StringSave (val);
28990         } else if (StringNICmp (line, "SP", 2) == 0) {
28991           if (StringICmp (val, "no")) {
28992             /* ignore*/
28993           } else {
28994             if (jour->imp == NULL) {
28995               jour->imp = ImprintNew();
28996             }
28997             if (StringHasNoText (jour->imp->pages)) {
28998               jour->imp->pages = StringSave (val);
28999             } else {
29000               tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (val) + StringLen (jour->imp->pages) + 2));
29001               sprintf (tmp, "%s-%s", val, jour->imp->pages);
29002               jour->imp->pages = MemFree (jour->imp->pages);
29003               jour->imp->pages = tmp;
29004             }
29005           }
29006         } else if (StringNICmp (line, "EP", 2) == 0) {
29007           if (StringICmp (val, "no")) {
29008             /* ignore*/
29009           } else {
29010             if (jour->imp == NULL) {
29011               jour->imp = ImprintNew();
29012             }
29013             if (StringHasNoText (jour->imp->pages)) {
29014               jour->imp->pages = StringSave (val);
29015             } else {
29016               tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (val) + StringLen (jour->imp->pages) + 2));
29017               sprintf (tmp, "%s-%s", jour->imp->pages, val);
29018               jour->imp->pages = MemFree (jour->imp->pages);
29019               jour->imp->pages = tmp;
29020             }
29021           }
29022         } else if (StringNICmp (line, "%P", 2) == 0) {
29023           if (StringICmp (val, "no")) {
29024             /* ignore*/
29025           } else {
29026             if (jour->imp == NULL) {
29027               jour->imp = ImprintNew();
29028             }
29029             jour->imp->pages = MemFree (jour->imp->pages);
29030             jour->imp->pages = StringSave (val);
29031           }
29032         } else if (StringNICmp (line, "AU", 2) == 0
29033                 || StringNICmp (line, "A1", 2) == 0
29034                 || StringNICmp (line, "%A", 2) == 0) {
29035           if (cit->authors == NULL) {
29036             cit->authors = AuthListNew ();
29037             cit->authors->choice = 1;
29038           }
29039           auth = AuthorFromEndnoteString(val);
29040           if (auth != NULL) {
29041             ValNodeAddPointer (&(cit->authors->names), 0, auth);
29042             anything = TRUE;
29043           }
29044         }
29045       }
29046     }
29047 
29048     line = MemFree (line);
29049     line = AbstractReadFunction (&rbd);
29050   }
29051   if (!anything) {
29052     pub = PubFree (pub);
29053   }
29054   return pub;
29055 }
29056 
29057 //Not used by Autodef or Cleanup
ReplaceStopsWithSelenocysteine(BioseqPtr bsp,FILE * log_fp)29058 static Int4 ReplaceStopsWithSelenocysteine(BioseqPtr bsp, FILE *log_fp)
29059 {
29060   SeqFeatPtr prot, cds;
29061   SeqMgrFeatContext context;
29062   ProtRefPtr prp;
29063   CharPtr    bases, cp;
29064   Int4       pos;
29065   SeqLocPtr  prot_loc, dna_loc;
29066   Boolean    partial5, partial3;
29067   BioseqPtr  nbsp;
29068   Char       nbases[10];
29069   CdRegionPtr crp;
29070   CodeBreakPtr cbp, last_cbp = NULL;
29071   CharPtr      fmt = "Unable to add transl_except for stop codon at position %d in protein %s because codon is not TGA\n";
29072   Char         id_buf[PATH_MAX];
29073   Int4         num_replaced = 0;
29074 
29075   if (bsp == NULL || !ISA_aa(bsp->mol)) {
29076     return num_replaced;
29077   }
29078   prot = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_PROT, &context);
29079   if (prot == NULL || (prp = (ProtRefPtr) prot->data.value.ptrvalue) == NULL
29080       || prp->name == NULL
29081       || StringISearch (prp->name->data.ptrvalue, "seleno") == NULL) {
29082     return num_replaced;
29083   }
29084   cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
29085   if (cds == NULL) {
29086     return num_replaced;
29087   }
29088   nbsp = BioseqFindFromSeqLoc (cds->location);
29089   if (nbsp == NULL) {
29090     return num_replaced;
29091   }
29092   crp = (CdRegionPtr) cds->data.value.ptrvalue;
29093   if (crp == NULL) {
29094     crp = CdRegionNew ();
29095     cds->data.value.ptrvalue = crp;
29096   }
29097 
29098   CheckSeqLocForPartial (prot->location, &partial5, &partial3);
29099   /* find stop codons */
29100   bases = GetSequenceByBsp(bsp);
29101   cp = StringChr (bases, '*');
29102   while (cp != NULL) {
29103     pos = cp - bases;
29104     prot_loc = SeqLocIntNew (pos, pos, Seq_strand_unknown, SeqIdFindBest (bsp->id, 0));
29105     dna_loc = productInterval_to_locationIntervals(cds, pos, pos, partial5);
29106     SeqPortStreamLoc (dna_loc, STREAM_EXPAND_GAPS, (Pointer) nbases, NULL);
29107     if (StringICmp (nbases, "TGA") == 0) {
29108       cbp = CodeBreakNew ();
29109       cbp->loc = dna_loc;
29110       cbp->aa.choice = 1; /* ncbieaa */
29111       cbp->aa.value.intvalue = 'U';
29112       if (last_cbp == NULL) {
29113         crp->code_break = cbp;
29114       } else {
29115         last_cbp->next = cbp;
29116       }
29117       last_cbp = cbp;
29118       num_replaced ++;
29119     } else {
29120       if (log_fp != NULL) {
29121         SeqIdWrite (SeqIdFindBest (bsp->id, 0), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
29122         fprintf (log_fp, fmt, pos + 1, id_buf);
29123       }
29124       dna_loc = SeqLocFree (dna_loc);
29125     }
29126     prot_loc = SeqLocFree (prot_loc);
29127     cp = StringChr (cp + 1, '*');
29128   }
29129 
29130   if (num_replaced > 0) {
29131     RetranslateOneCDS (cds, cds->idx.entityID, TRUE, TRUE);
29132   }
29133 
29134   return num_replaced;
29135 }
29136 
29137 
29138 typedef struct fixlog {
29139   Int4 num_replaced;
29140   FILE *log_fp;
29141 } FixLogData, PNTR FixLogPtr;
29142 
29143 //Not used by Autodef or Cleanup
ReplaceStopsWithSelenocysteineCallback(BioseqPtr bsp,Pointer data)29144 static void ReplaceStopsWithSelenocysteineCallback (BioseqPtr bsp, Pointer data)
29145 {
29146   FixLogPtr rp = (FixLogPtr) data;
29147 
29148   if (rp == NULL) {
29149     return;
29150   }
29151   rp->num_replaced += ReplaceStopsWithSelenocysteine(bsp, rp->log_fp);
29152 }
29153 
29154 //Not used by Autodef or Cleanup
ReplaceStopsWithSelenocysteineInSeqEntry(SeqEntryPtr sep,FILE * log_fp)29155 NLM_EXTERN Boolean ReplaceStopsWithSelenocysteineInSeqEntry (SeqEntryPtr sep, FILE *log_fp)
29156 {
29157   FixLogData rd;
29158 
29159   MemSet (&rd, 0, sizeof (FixLogData));
29160   rd.log_fp = log_fp;
29161 
29162   VisitBioseqsInSep (sep, &rd, ReplaceStopsWithSelenocysteineCallback);
29163   if (rd.num_replaced > 0) {
29164     if (log_fp != NULL) {
29165       fprintf (log_fp, "Replaced %d stops with selenocysteine\n", rd.num_replaced);
29166     }
29167     return TRUE;
29168   } else {
29169     return FALSE;
29170   }
29171 }
29172 
29173 
29174 typedef struct trnamatch {
29175   CharPtr label;
29176   Int4    left;
29177   Int4    len;
29178   Uint1   strand;
29179   SeqFeatPtr sfp;
29180   SeqFeatPtr gene;
29181 } trnaMatchData, PNTR trnaMatchPtr;
29182 
29183 //Not used by Autodef or Cleanup
trnaMatchNew(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)29184 trnaMatchPtr trnaMatchNew (SeqFeatPtr sfp, SeqMgrFeatContextPtr context)
29185 {
29186   trnaMatchPtr t = (trnaMatchPtr) MemNew (sizeof (trnaMatchData));
29187   t->label = StringSave(context->label);
29188   t->left = context->left;
29189   t->len = SeqLocLen (sfp->location);
29190   t->strand = context->strand;
29191   t->sfp = sfp;
29192   t->gene = GetGeneForFeature (t->sfp);
29193   return t;
29194 }
29195 
29196 
29197 //Not used by Autodef or Cleanup
trnaMatchFree(trnaMatchPtr t)29198 trnaMatchPtr trnaMatchFree (trnaMatchPtr t)
29199 {
29200   if (t != NULL) {
29201     t->label = MemFree (t->label);
29202     t = MemFree (t);
29203   }
29204   return t;
29205 }
29206 
29207 
29208 //Not used by Autodef or Cleanup
SortVnpBytrnaMatch(VoidPtr ptr1,VoidPtr ptr2)29209 static int LIBCALLBACK SortVnpBytrnaMatch (VoidPtr ptr1, VoidPtr ptr2)
29210 
29211 {
29212   trnaMatchPtr     str1;
29213   trnaMatchPtr     str2;
29214   ValNodePtr  vnp1;
29215   ValNodePtr  vnp2;
29216   int rval = 0;
29217 
29218   if (ptr1 != NULL && ptr2 != NULL) {
29219     vnp1 = *((ValNodePtr PNTR) ptr1);
29220     vnp2 = *((ValNodePtr PNTR) ptr2);
29221     if (vnp1 != NULL && vnp2 != NULL) {
29222       str1 = (trnaMatchPtr) vnp1->data.ptrvalue;
29223       str2 = (trnaMatchPtr) vnp2->data.ptrvalue;
29224       if (str1 != NULL && str2 != NULL) {
29225         rval = StringICmp (str1->label, str2->label);
29226         if (rval == 0) {
29227           if (str1->strand == Seq_strand_minus && str2->strand != Seq_strand_minus) {
29228             rval = 1;
29229           } else if (str1->strand != Seq_strand_minus && str2->strand == Seq_strand_minus) {
29230             rval = -1;
29231           }
29232         }
29233         if (rval == 0) {
29234           if (str1->strand == Seq_strand_minus) {
29235             if (str1->left > str2->left) {
29236               rval = -1;
29237             } else if (str1->left < str2->left) {
29238               rval = 1;
29239             }
29240           } else {
29241             if (str1->left > str2->left) {
29242               rval = 1;
29243             } else if (str1->left < str2->left) {
29244               rval = -1;
29245             }
29246           }
29247         }
29248       }
29249     }
29250   }
29251   return rval;
29252 }
29253 
29254 
29255 //Not used by Autodef or Cleanup
AddToLoc(SeqLocPtr PNTR loc,SeqLocPtr add,Boolean single_interval,BioseqPtr bsp)29256 static void AddToLoc (SeqLocPtr PNTR loc, SeqLocPtr add, Boolean single_interval, BioseqPtr bsp)
29257 {
29258   SeqLocPtr new_loc;
29259 
29260   if (loc == NULL || *loc == NULL || add == NULL) {
29261     return;
29262   }
29263   new_loc = SeqLocMerge (bsp, *loc, add, single_interval, FALSE, FALSE);
29264   *loc = SeqLocFree (*loc);
29265   *loc = new_loc;
29266 }
29267 
29268 
29269 //Not used by Autodef or Cleanup
JoinShortTrnasCallback(BioseqPtr bsp,Pointer data)29270 static void JoinShortTrnasCallback(BioseqPtr bsp, Pointer data)
29271 {
29272   FixLogPtr rp;
29273   SeqFeatPtr sfp;
29274   SeqMgrFeatContext context;
29275   ValNodePtr list = NULL, vnp;
29276   trnaMatchPtr t_prev, t_this;
29277 
29278   if (bsp == NULL) {
29279     return;
29280   }
29281   rp = (FixLogPtr) data;
29282 
29283   for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_tRNA, &context);
29284        sfp != NULL;
29285        sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_tRNA, &context)) {
29286     if (SeqLocLen (sfp->location) < 50) {
29287       ValNodeAddPointer (&list, 0, trnaMatchNew(sfp, &context));
29288     }
29289   }
29290 
29291   if (list != NULL && list->next != NULL) {
29292     list = ValNodeSort (list, SortVnpBytrnaMatch);
29293     t_prev = list->data.ptrvalue;
29294     vnp = list->next;
29295     while (vnp != NULL) {
29296       t_this = vnp->data.ptrvalue;
29297       if (StringICmp (t_prev->label, t_this->label) == 0
29298           && ((t_prev->strand == Seq_strand_minus && t_this->strand == Seq_strand_minus)
29299               || (t_prev->strand != Seq_strand_minus && t_this->strand != Seq_strand_minus))) {
29300         AddToLoc (&(t_prev->sfp->location), t_this->sfp->location, FALSE, bsp);
29301         if (t_prev->gene != NULL) {
29302           if (t_this->gene != NULL) {
29303             AddToLoc (&(t_prev->gene->location), t_this->gene->location, TRUE, bsp);
29304           } else {
29305             AddToLoc (&(t_prev->gene->location), t_this->sfp->location, TRUE, bsp);
29306           }
29307         }
29308         if (t_this->gene != NULL) {
29309           t_this->gene->idx.deleteme = TRUE;
29310         }
29311 
29312         SetStringValue (&(t_prev->sfp->comment), t_this->sfp->comment, ExistingTextOption_append_semi);
29313         t_this->sfp->idx.deleteme = TRUE;
29314         rp->num_replaced ++;
29315         vnp = vnp->next;
29316       } else {
29317         t_prev = t_this;
29318         while (t_prev != NULL && t_prev->sfp->idx.deleteme) {
29319           vnp = vnp->next;
29320           if (vnp == NULL) {
29321             t_prev = NULL;
29322           } else {
29323             t_prev = vnp->data.ptrvalue;
29324             vnp = vnp->next;
29325           }
29326         }
29327         if (vnp != NULL) {
29328           vnp = vnp->next;
29329         }
29330       }
29331     }
29332   }
29333 
29334   for (vnp = list; vnp != NULL; vnp = vnp->next) {
29335     vnp->data.ptrvalue = trnaMatchFree (vnp->data.ptrvalue);
29336   }
29337   list = ValNodeFree (list);
29338 }
29339 
29340 //Not used by Autodef or Cleanup
JoinShortTrnas(SeqEntryPtr sep,FILE * log_fp)29341 NLM_EXTERN Boolean JoinShortTrnas (SeqEntryPtr sep, FILE *log_fp)
29342 {
29343   FixLogData rd;
29344 
29345   MemSet (&rd, 0, sizeof (FixLogData));
29346   rd.log_fp = log_fp;
29347 
29348   VisitBioseqsInSep (sep, &rd, JoinShortTrnasCallback);
29349   DeleteMarkedObjects (ObjMgrGetEntityIDForChoice (sep), 0, NULL);
29350 
29351   if (rd.num_replaced > 0) {
29352     if (log_fp != NULL) {
29353       fprintf (log_fp, "Joined %d short tRNAs\n", rd.num_replaced);
29354     }
29355     return TRUE;
29356   } else {
29357     return FALSE;
29358   }
29359 }
29360 
29361 
29362 //Not part of Autodef or Cleanup
IsRegulatorySubtype(Uint1 key)29363 NLM_EXTERN Boolean IsRegulatorySubtype (Uint1 key)
29364 {
29365     if (key == FEATDEF_enhancer ||
29366         key == FEATDEF_promoter ||
29367         key == FEATDEF_CAAT_signal ||
29368         key == FEATDEF_TATA_signal ||
29369         key == FEATDEF_35_signal ||
29370         key == FEATDEF_10_signal ||
29371         key == FEATDEF_RBS ||
29372         key == FEATDEF_GC_signal ||
29373         key == FEATDEF_polyA_signal ||
29374         key == FEATDEF_attenuator ||
29375         key == FEATDEF_terminator ||
29376         key == FEATDEF_misc_signal) {
29377         return TRUE;
29378     } else {
29379         return FALSE;
29380     }
29381 }
29382 //LCOV_EXCL_STOP
29383 
29384