1 /*   sqnutil1.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  sqnutil1.c
27 *
28 * Author:  Jonathan Kans
29 *
30 * Version Creation Date:   9/2/97
31 *
32 * $Revision: 6.913 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date     Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 
45 #include <sqnutils.h>
46 #include <gather.h>
47 #include <subutil.h>
48 #include <objfdef.h>
49 #include <seqport.h>
50 #include <objproj.h>
51 /* #include <objmmdb1.h> */
52 #include <gbfeat.h>
53 #include <gbftdef.h>
54 #include <edutil.h>
55 #include <tofasta.h>
56 #include <parsegb.h>
57 #include <utilpars.h>
58 #include <validatr.h>
59 #include <explore.h>
60 #include <subutil.h>
61 #include <asn2gnbi.h>
62 #include <salpacc.h>
63 #include <alignmgr2.h>
64 #include <valid.h>
65 #include <objvalid.h>
66 #include <valapi.h>
67 #include <findrepl.h>
68 
69 
70 #define NLM_GENERATED_CODE_PROTO
71 #include <objmacro.h>
72 #include <macroapi.h>
73 
74 static int descr_insert_order [] = {
75   Seq_descr_title,
76   Seq_descr_source,
77   Seq_descr_molinfo,
78   Seq_descr_het,
79   Seq_descr_pub,
80   Seq_descr_comment,
81   Seq_descr_name,
82   Seq_descr_user,
83   Seq_descr_maploc,
84   Seq_descr_region,
85   Seq_descr_num,
86   Seq_descr_dbxref,
87   Seq_descr_mol_type,
88   Seq_descr_modif,
89   Seq_descr_method,
90   Seq_descr_org,
91   Seq_descr_sp,
92   Seq_descr_pir,
93   Seq_descr_prf,
94   Seq_descr_pdb,
95   Seq_descr_embl,
96   Seq_descr_genbank,
97   Seq_descr_modelev,
98   Seq_descr_create_date,
99   Seq_descr_update_date,
100   0
101 };
102 
NormalizeDescriptorProc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)103 static void NormalizeDescriptorProc (
104   SeqEntryPtr sep,
105   Pointer data,
106   Int4 index,
107   Int2 indent
108 )
109 
110 {
111   BioseqPtr         bsp;
112   BioseqSetPtr      bssp;
113   /* arrays are SEQDESCR_MAX + 1, last slot stores unexpected descriptor numbers */
114   SeqDescrPtr       first [SEQDESCR_MAX + 1];
115   SeqDescrPtr       last [SEQDESCR_MAX + 1];
116   int               i;
117   int               idx;
118   SeqDescrPtr PNTR  head = NULL;
119   SeqDescrPtr PNTR  prev = NULL;
120   SeqDescrPtr       next;
121   SeqDescrPtr       sdp;
122 
123   if (sep == NULL) return;
124 
125   if (IS_Bioseq (sep)) {
126     bsp = (BioseqPtr) sep->data.ptrvalue;
127     if (bsp == NULL) return;
128     head = &(bsp->descr);
129   } else if (IS_Bioseq_set (sep)) {
130     bssp = (BioseqSetPtr) sep->data.ptrvalue;
131     if (bssp == NULL) return;
132     head = &(bssp->descr);
133   }
134   if (head == NULL) return;
135 
136   MemSet ((Pointer) &first, 0, sizeof (first));
137   MemSet ((Pointer) &last, 0, sizeof (last));
138 
139   prev = head;
140   sdp = *prev;
141   while (sdp != NULL) {
142     next = sdp->next;
143 
144     *prev = sdp->next;
145     sdp->next = NULL;
146 
147     idx = (int) sdp->choice;
148     /* unexpected descriptor numbers go into last slot */
149     if (idx <= 0 || idx >= SEQDESCR_MAX) {
150       idx = SEQDESCR_MAX;
151     }
152     if (idx > 0 && idx <= SEQDESCR_MAX) {
153       if (first [idx] == NULL) {
154         first [idx] = sdp;
155       }
156       if (last [idx] != NULL) {
157         (last [idx])->next = sdp;
158       }
159       last [idx] = sdp;
160     }
161 
162     sdp = next;
163   }
164 
165   for (i = 0; descr_insert_order [i] != 0; i++) {
166     idx = descr_insert_order [i];
167     sdp = first [idx];
168     if (sdp == NULL) continue;
169     ValNodeLink (head, sdp);
170   }
171 }
172 
NormalizeDescriptorOrder(SeqEntryPtr sep)173 NLM_EXTERN void NormalizeDescriptorOrder (
174   SeqEntryPtr sep
175 )
176 
177 {
178   SeqEntryExplore (sep, NULL, NormalizeDescriptorProc);
179 }
180 
181 typedef struct orgscan {
182   ObjMgrPtr     omp;
183   Int2          nuclCode;
184   Int2          mitoCode;
185   Int2          pstdCode;
186   Boolean       mito;
187   Boolean       plastid;
188   Char          taxname [196];
189   BioSourcePtr  biop;
190 } OrgScan, PNTR OrgScanPtr;
191 
OrgScanGatherFunc(GatherContextPtr gcp)192 static Boolean OrgScanGatherFunc (GatherContextPtr gcp)
193 
194 {
195   BioSourcePtr   biop;
196   Boolean        doCodes = FALSE;
197   Boolean        doMito = FALSE;
198   Boolean        doTaxname = FALSE;
199   Boolean        mito = FALSE;
200   Int2           mitoCode = 0;
201   Int2           nuclCode = 0;
202   Int2           pstdCode = 0;
203   ObjMgrTypePtr  omtp;
204   OrgNamePtr     onp;
205   OrgRefPtr      orp;
206   OrgScanPtr     osp;
207   ValNodePtr     sdp;
208   SeqFeatPtr     sfp;
209   Uint2          subtype;
210   CharPtr        taxname = NULL;
211   Int2           val;
212   ValNodePtr     vnp;
213 
214   if (gcp == NULL || gcp->thisitem == NULL) return TRUE;
215   if (gcp->thistype != OBJ_SEQFEAT  && gcp->thistype != OBJ_SEQDESC) return TRUE;
216 
217   osp = (OrgScanPtr) gcp->userdata;
218   if (osp == NULL) return TRUE;
219 
220   subtype = 0;
221   if (gcp->thistype == OBJ_SEQFEAT  || gcp->thistype == OBJ_SEQDESC) {
222     omtp = ObjMgrTypeFind (osp->omp, gcp->thistype, NULL, NULL);
223     if (omtp == NULL) {
224       return TRUE;
225     }
226     if (omtp->subtypefunc != NULL) {
227       subtype = (*(omtp->subtypefunc)) (gcp->thisitem);
228     }
229   }
230 
231   orp = NULL;
232   biop = NULL;
233   switch (gcp->thistype) {
234     case OBJ_SEQFEAT :
235       sfp = (SeqFeatPtr) gcp->thisitem;
236       switch (subtype) {
237         case FEATDEF_ORG :
238           //LCOV_EXCL_START
239           //org features are converted to biosrc features in BasicCleanup
240           orp = (OrgRefPtr) sfp->data.value.ptrvalue;
241           break;
242           //LCOV_EXCL_STOP
243         case FEATDEF_BIOSRC :
244           biop = (BioSourcePtr) sfp->data.value.ptrvalue;
245           break;
246         default :
247           break;
248       }
249       break;
250     case OBJ_SEQDESC :
251       sdp = (ValNodePtr) gcp->thisitem;
252       switch (subtype) {
253         case Seq_descr_modif :
254           vnp = (ValNodePtr) sdp->data.ptrvalue;
255           while (vnp != NULL) {
256             val = (Int2) vnp->data.intvalue;
257             if (val == MODIF_mitochondrial || val == MODIF_kinetoplast) {
258               mito = TRUE;
259               doMito = TRUE;
260               /* osp->mito = TRUE; */
261             }
262             vnp = vnp->next;
263           }
264           break;
265         case Seq_descr_org :
266           //LCOV_EXCL_START
267           // org descriptors are converted to biosrc descriptors in basiccleanup
268           orp = (OrgRefPtr) sdp->data.ptrvalue;
269           break;
270           //LCOV_EXCL_STOP
271         case Seq_descr_source :
272           biop = (BioSourcePtr) sdp->data.ptrvalue;
273           break;
274         default :
275           break;
276       }
277       break;
278     default :
279       break;
280   }
281 
282   if (orp == NULL && biop != NULL) {
283     orp = biop->org;
284     mito = (Boolean) (biop->genome == GENOME_kinetoplast ||
285                       biop->genome == GENOME_mitochondrion ||
286                       biop->genome == GENOME_hydrogenosome);
287     doMito = TRUE;
288     /* osp->mito = (Boolean) (biop->genome == 4 || biop->genome == 5); */
289   }
290   if (orp != NULL) {
291     taxname = orp->taxname;
292     doTaxname = TRUE;
293     /* StringNCpy_0 (osp->taxname, orp->taxname, sizeof (osp->taxname)); */
294     onp = orp->orgname;
295     if (onp != NULL) {
296       nuclCode = onp->gcode;
297       mitoCode = onp->mgcode;
298       pstdCode = onp->pgcode;
299       doCodes = TRUE;
300       /* osp->nuclCode = onp->gcode;
301       osp->mitoCode = onp->mgcode; */
302     }
303   }
304   if (biop != NULL) {
305     if (osp->biop == NULL || biop->is_focus) {
306       osp->biop = biop;
307       if (doMito) {
308         osp->mito = mito;
309       }
310       osp->plastid = (Boolean) (biop->genome == GENOME_chloroplast ||
311                                 biop->genome == GENOME_chromoplast ||
312                                 biop->genome == GENOME_plastid ||
313                                 biop->genome == GENOME_cyanelle ||
314                                 biop->genome == GENOME_apicoplast ||
315                                 biop->genome == GENOME_leucoplast ||
316                                 biop->genome == GENOME_proplastid ||
317                                 biop->genome == GENOME_chromatophore);
318       if (doCodes) {
319         osp->nuclCode = nuclCode;
320         osp->mitoCode = mitoCode;
321         osp->pstdCode = pstdCode;
322       }
323       if (doTaxname) {
324         StringNCpy_0 (osp->taxname, taxname, sizeof (osp->taxname));
325       }
326     }
327   }
328 
329   return TRUE;
330 }
331 
332 //LCOV_EXCL_START
SeqEntryOrEntityIDToGeneticCode(SeqEntryPtr sep,Uint2 entityID,BoolPtr mito,CharPtr taxname,size_t maxsize,BioSourcePtr PNTR biopp)333 static Int2 SeqEntryOrEntityIDToGeneticCode (SeqEntryPtr sep, Uint2 entityID, BoolPtr mito,
334                                              CharPtr taxname, size_t maxsize,
335                                              BioSourcePtr PNTR biopp)
336 
337 {
338   GatherScope  gs;
339   OrgScan      osp;
340 
341   if (mito != NULL) {
342     *mito = FALSE;
343   }
344   if (taxname != NULL && maxsize > 0) {
345     *taxname = '\0';
346   }
347   osp.mito = FALSE;
348   osp.plastid = FALSE;
349   osp.nuclCode = 0;
350   osp.mitoCode = 0;
351   osp.pstdCode = 0;
352   osp.omp = ObjMgrGet ();
353   osp.taxname [0] = '\0';
354   osp.biop = NULL;
355   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
356   gs.seglevels = 1;
357   gs.get_feats_location = TRUE;
358   MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
359   gs.ignore[OBJ_BIOSEQ] = FALSE;
360   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
361   gs.ignore[OBJ_SEQFEAT] = FALSE;
362   gs.ignore[OBJ_SEQANNOT] = FALSE;
363   gs.ignore[OBJ_SEQDESC] = FALSE;
364   if (sep != NULL) {
365     gs.scope = sep;
366     GatherSeqEntry (sep, (Pointer) &osp, OrgScanGatherFunc, &gs);
367   } else if (entityID > 0) {
368     GatherEntity (entityID, (Pointer) &osp, OrgScanGatherFunc, &gs);
369   }
370   if (mito != NULL) {
371     *mito = osp.mito;
372   }
373   if (taxname != NULL && maxsize > 0) {
374     StringNCpy_0 (taxname, osp.taxname, maxsize);
375   }
376   if (biopp != NULL) {
377     *biopp = osp.biop;
378   }
379   if (osp.plastid) {
380     if (osp.pstdCode > 0) {
381       return osp.pstdCode;
382     } else {
383       return 11;
384     }
385   } else if (osp.mito) {
386     return osp.mitoCode;
387   } else {
388     return osp.nuclCode;
389   }
390 }
391 
EntityIDToGeneticCode(Uint2 entityID,BoolPtr mito,CharPtr taxname,size_t maxsize)392 NLM_EXTERN Int2 EntityIDToGeneticCode (Uint2 entityID, BoolPtr mito, CharPtr taxname, size_t maxsize)
393 
394 {
395   return SeqEntryOrEntityIDToGeneticCode (NULL, entityID, mito, taxname, maxsize, NULL);
396 }
397 
SeqEntryToGeneticCode(SeqEntryPtr sep,BoolPtr mito,CharPtr taxname,size_t maxsize)398 NLM_EXTERN Int2 SeqEntryToGeneticCode (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize)
399 
400 {
401   return SeqEntryOrEntityIDToGeneticCode (sep, 0, mito, taxname, maxsize, NULL);
402 }
403 
SeqEntryToBioSource(SeqEntryPtr sep,BoolPtr mito,CharPtr taxname,size_t maxsize,BioSourcePtr PNTR biopp)404 NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize, BioSourcePtr PNTR biopp)
405 
406 {
407   return SeqEntryOrEntityIDToGeneticCode (sep, 0, mito, taxname, maxsize, biopp);
408 }
409 
410 
BioseqToGeneticCode(BioseqPtr bsp,Int2Ptr gencodep,BoolPtr mitop,BoolPtr plastidp,CharPtr taxnamep,size_t maxsize,BioSourcePtr PNTR biopp)411 NLM_EXTERN Boolean BioseqToGeneticCode (
412   BioseqPtr bsp,
413   Int2Ptr gencodep,
414   BoolPtr mitop,
415   BoolPtr plastidp,
416   CharPtr taxnamep,
417   size_t maxsize,
418   BioSourcePtr PNTR biopp
419 )
420 
421 {
422   BioSourcePtr       biop = NULL;
423   SeqMgrDescContext  dcontext;
424   SeqMgrFeatContext  fcontext;
425   Int2               gencode = 0;
426   Boolean            mito = FALSE;
427   Int2               mitoCode = 0;
428   Int2               nuclCode = 0;
429   Int2               pstdCode = 0;
430   OrgNamePtr         onp;
431   OrgRefPtr          orp;
432   Boolean            plastid = FALSE;
433   SeqDescrPtr        sdp;
434   SeqFeatPtr         sfp;
435   CharPtr            taxname = NULL;
436 
437   if (bsp == NULL) return FALSE;
438 
439   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
440   if (sdp != NULL) {
441     biop = (BioSourcePtr) sdp->data.ptrvalue;
442   }
443 
444   if (biop == NULL) {
445     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
446     if (sfp != NULL) {
447       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
448     }
449   }
450 
451   if (biop == NULL) return FALSE;
452   orp = biop->org;
453   if (orp == NULL) return FALSE;
454 
455   taxname = orp->taxname;
456   if (StringHasNoText (taxname)) return FALSE;
457 
458   onp = orp->orgname;
459   if (onp != NULL) {
460     nuclCode = onp->gcode;
461     mitoCode = onp->mgcode;
462     pstdCode = onp->pgcode;
463   }
464 
465   mito = (Boolean) (biop->genome == GENOME_kinetoplast ||
466                     biop->genome == GENOME_mitochondrion ||
467                     biop->genome == GENOME_hydrogenosome);
468 
469   plastid = (Boolean) (biop->genome == GENOME_chloroplast ||
470                        biop->genome == GENOME_chromoplast ||
471                        biop->genome == GENOME_plastid ||
472                        biop->genome == GENOME_cyanelle ||
473                        biop->genome == GENOME_apicoplast ||
474                        biop->genome == GENOME_leucoplast ||
475                        biop->genome == GENOME_proplastid ||
476                        biop->genome == GENOME_chromatophore);
477 
478   if (plastid) {
479     if (pstdCode > 0) {
480       gencode = pstdCode;
481     } else {
482       gencode = 11;
483     }
484   } else if (mito) {
485     gencode = mitoCode;
486   } else {
487     gencode = nuclCode;
488   }
489 
490   if (gencodep != NULL) {
491     *gencodep = gencode;
492   }
493   if (mitop != NULL) {
494     *mitop = mito;
495   }
496   if (plastidp != NULL) {
497     *plastidp = plastid;
498   }
499   if (taxnamep != NULL && maxsize > 0) {
500     StringNCpy_0 (taxnamep, taxname, maxsize);
501   }
502   if (biopp != NULL) {
503     *biopp = biop;
504   }
505 
506   return TRUE;
507 }
508 
509 
510 typedef struct commontitle {
511   BioseqPtr bsp;
512   SeqDescPtr sdp;
513 } CommonTitleData, PNTR CommonTitlePtr;
514 
515 
CommonTitleNew(BioseqPtr bsp,SeqDescPtr sdp)516 static CommonTitlePtr CommonTitleNew (BioseqPtr bsp, SeqDescPtr sdp)
517 {
518   CommonTitlePtr c = (CommonTitlePtr) MemNew (sizeof (CommonTitleData));
519   c->bsp = bsp;
520   c->sdp = sdp;
521   return c;
522 }
523 
524 
CommonTitleFree(CommonTitlePtr c)525 static CommonTitlePtr CommonTitleFree (CommonTitlePtr c)
526 {
527   if (c != NULL) {
528     c = MemFree (c);
529   }
530   return c;
531 }
532 
533 
CommonTitleListFree(ValNodePtr vnp)534 static ValNodePtr CommonTitleListFree (ValNodePtr vnp)
535 {
536   ValNodePtr vnp_next;
537 
538   while (vnp != NULL) {
539     vnp_next = vnp->next;
540     vnp->next = NULL;
541     vnp->data.ptrvalue = CommonTitleFree (vnp->data.ptrvalue);
542     vnp = ValNodeFree (vnp);
543     vnp = vnp_next;
544   }
545   return vnp;
546 }
547 
548 
RemoveCommonTitles(ValNodePtr vnp,CharPtr common_title)549 static void RemoveCommonTitles (ValNodePtr vnp, CharPtr common_title)
550 {
551   CommonTitlePtr c;
552   ObjValNodePtr  ovp;
553 
554   while (vnp != NULL) {
555     c = vnp->data.ptrvalue;
556     if (StringCmp (c->sdp->data.ptrvalue, common_title) == 0 && c->sdp->extended > 0) {
557       ovp = (ObjValNodePtr) c->sdp;
558       ovp->idx.deleteme = TRUE;
559     }
560     vnp = vnp->next;
561   }
562 }
563 
564 
SortCommonTitle(VoidPtr ptr1,VoidPtr ptr2)565 static int LIBCALLBACK SortCommonTitle (VoidPtr ptr1, VoidPtr ptr2)
566 
567 {
568   CommonTitlePtr c1;
569   CommonTitlePtr c2;
570   ValNodePtr  vnp1;
571   ValNodePtr  vnp2;
572 
573   if (ptr1 != NULL && ptr2 != NULL) {
574     vnp1 = *((ValNodePtr PNTR) ptr1);
575     vnp2 = *((ValNodePtr PNTR) ptr2);
576     if (vnp1 != NULL && vnp2 != NULL) {
577       c1 = (CommonTitlePtr) vnp1->data.ptrvalue;
578       c2 = (CommonTitlePtr) vnp2->data.ptrvalue;
579       if (c1 != NULL && c2 != NULL && c1->sdp != NULL && c2->sdp != NULL
580           && c1->sdp->data.ptrvalue != NULL && c2->sdp->data.ptrvalue != NULL) {
581         return StringCmp (c1->sdp->data.ptrvalue, c2->sdp->data.ptrvalue);
582       }
583     }
584   }
585   return 0;
586 }
587 
588 
CollectCommonTitle(BioseqPtr bsp,Pointer data)589 static void CollectCommonTitle (BioseqPtr bsp, Pointer data)
590 {
591   SeqDescPtr sdp;
592 
593   if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
594     return;
595   }
596 
597   sdp = bsp->descr;
598   while (sdp != NULL) {
599     if (sdp->choice == Seq_descr_title) {
600       ValNodeAddPointer ((ValNodePtr PNTR) data, 0, CommonTitleNew (bsp, sdp));
601     }
602     sdp = sdp->next;
603   }
604 }
605 
606 
FindCommonTitleFromList(ValNodePtr list)607 static CharPtr FindCommonTitleFromList (ValNodePtr list)
608 {
609   ValNodePtr vnp;
610   CommonTitlePtr c;
611   Int4 num_common = 0, num_total, num_expected;
612   CharPtr common_title;
613 
614   if (list == NULL) {
615     return NULL;
616   }
617   num_total = ValNodeLen (list);
618   if (num_total % 2 != 0 || num_total < 4) {
619     return NULL;
620   }
621   num_expected = num_total / 2;
622 
623   c = list->data.ptrvalue;
624   common_title = c->sdp->data.ptrvalue;
625   num_common = 1;
626 
627   for (vnp = list->next; vnp != NULL; vnp = vnp->next) {
628     c = (CommonTitlePtr) vnp->data.ptrvalue;
629     if (StringCmp (common_title, c->sdp->data.ptrvalue) == 0) {
630       num_common++;
631     } else {
632       num_common = 1;
633       common_title = c->sdp->data.ptrvalue;
634     }
635   }
636   if (num_common == num_expected) {
637     return StringSave (common_title);
638   }
639 
640   return NULL;
641 }
642 
643 
PromoteCommonTitlesSetCallback(BioseqSetPtr bssp,Pointer data)644 static void PromoteCommonTitlesSetCallback (BioseqSetPtr bssp, Pointer data)
645 {
646   ValNodePtr list = NULL;
647   CharPtr common_title = NULL;
648   SeqDescrPtr sdp;
649   Int4        num_member = 0;
650   SeqEntryPtr s;
651   CharPtr     set_title = NULL;
652 
653   if (bssp == NULL || !GetsDocsumTitle (bssp->_class)) {
654     return;
655   }
656 
657   VisitBioseqsInSet (bssp, &list, CollectCommonTitle);
658   list = ValNodeSort (list, SortCommonTitle);
659 
660   common_title = FindCommonTitleFromList (list);
661   if (common_title != NULL) {
662     s = bssp->seq_set;
663     while (s != NULL) {
664       num_member++;
665       s = s->next;
666     }
667     if (ValNodeLen (list) == num_member) {
668       for (sdp = bssp->descr; sdp != NULL && set_title == NULL; sdp = sdp->next) {
669         if (sdp->choice == Seq_descr_title) {
670           set_title = sdp->data.ptrvalue;
671         }
672       }
673       if (set_title != NULL
674           && StringCmp (set_title, common_title) != 0) {
675         /* don't remove, the seq titles just happen to be identical */
676         common_title = MemFree (common_title);
677       }
678     }
679   }
680   if (common_title != NULL) {
681     sdp = SeqDescrNew (NULL);
682     sdp->choice = Seq_descr_title;
683     sdp->data.ptrvalue = common_title;
684     sdp->next = bssp->descr;
685     bssp->descr = sdp;
686     RemoveCommonTitles (list, common_title);
687   }
688   list = CommonTitleListFree(list);
689 }
690 
691 
PromoteCommonTitlesToSet(SeqEntryPtr sep)692 NLM_EXTERN void PromoteCommonTitlesToSet (SeqEntryPtr sep)
693 {
694   VisitSetsInSep (sep, NULL, PromoteCommonTitlesSetCallback);
695 }
696 //LCOV_EXCL_STOP
697 
DeleteMultipleTitles(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)698 NLM_EXTERN void DeleteMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
699 
700 {
701   BioseqPtr     bsp;
702   BioseqSetPtr  bssp;
703   Boolean       hastitle;
704   ValNodePtr    nextsdp;
705   Pointer PNTR  prevsdp;
706   ValNodePtr    sdp;
707 
708   if (IS_Bioseq (sep)) {
709     bsp = (BioseqPtr) sep->data.ptrvalue;
710     sdp = bsp->descr;
711     prevsdp = (Pointer PNTR) &(bsp->descr);
712   } else if (IS_Bioseq_set (sep)) {
713       //LCOV_EXCL_START
714       //cleanup functions only call this during RenormalizeNucProtSets,
715       //and only for Bioseqs
716     bssp = (BioseqSetPtr) sep->data.ptrvalue;
717     sdp = bssp->descr;
718     prevsdp = (Pointer PNTR) &(bssp->descr);
719     //LCOV_EXCL_STOP
720   } else return;
721   hastitle = FALSE;
722   while (sdp != NULL) {
723     nextsdp = sdp->next;
724     if (sdp->choice == Seq_descr_title) {
725       if (hastitle) {
726         //LCOV_EXCL_START
727         //when called from RenormalizeNucProtSets,
728         //extra titles are already gone
729         *(prevsdp) = sdp->next;
730         sdp->next = NULL;
731         SeqDescFree (sdp);
732         //LCOV_EXCL_STOP
733       } else {
734         hastitle = TRUE;
735         prevsdp = (Pointer PNTR) &(sdp->next);
736       }
737     } else {
738       prevsdp = (Pointer PNTR) &(sdp->next);
739     }
740     sdp = nextsdp;
741   }
742 }
743 
RenormalizeNucProtSets(SeqEntryPtr sep,Boolean relink)744 NLM_EXTERN Int4 RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink)
745 
746 {
747   SeqAnnotPtr    annot;
748   BioseqPtr      bsp;
749   BioseqSetPtr   bssp;
750   ValNodePtr     descr;
751   ObjMgrDataPtr  omdptop;
752   ObjMgrData     omdata;
753   Uint2          parenttype;
754   Pointer        parentptr;
755   SeqAnnotPtr    sap, tmp_sap;
756   SeqEntryPtr    seqentry;
757   Int4           num_renormalized = 0;
758 
759   if (sep == NULL) return 0;
760   if (IS_Bioseq_set (sep)) {
761     bssp = (BioseqSetPtr) sep->data.ptrvalue;
762     if (bssp != NULL && (bssp->_class == 7 ||
763                          (bssp->_class >= 13 && bssp->_class <= 16) ||
764                          bssp->_class == BioseqseqSet_class_wgs_set ||
765                          bssp->_class == BioseqseqSet_class_gen_prod_set ||
766                          bssp->_class == BioseqseqSet_class_small_genome_set)) {
767       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
768         num_renormalized += RenormalizeNucProtSets (sep, relink);
769       }
770       return num_renormalized;
771     }
772     if (bssp != NULL && bssp->_class == 1) {
773       seqentry = bssp->seq_set;
774       if (seqentry != NULL && seqentry->next == NULL) {
775 
776         if (relink) {
777           SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
778           GetSeqEntryParent (sep, &parentptr, &parenttype);
779         }
780 
781         descr = bssp->descr;
782         bssp->descr = NULL;
783         annot = bssp->annot;
784         bssp->annot = NULL;
785 
786         sep->choice = seqentry->choice;
787         sep->data.ptrvalue = seqentry->data.ptrvalue;
788         seqentry->data.ptrvalue = NULL;
789         bssp->seq_set = NULL;
790         bssp->seqentry = NULL;
791         MemFree (seqentry);
792         BioseqSetFree (bssp);
793 
794         sap = NULL;
795         if (IS_Bioseq (sep)) {
796           bsp = (BioseqPtr) sep->data.ptrvalue;
797           ValNodeLink (&(bsp->descr), descr);
798           if (bsp->annot == NULL) {
799             bsp->annot = annot;
800             annot = NULL;
801           } else {
802             sap = bsp->annot;
803           }
804         } else if (IS_Bioseq_set (sep)) {
805             //LCOV_EXCL_START
806             //should not have set inside nuc-prot set
807           bssp = (BioseqSetPtr) sep->data.ptrvalue;
808           ValNodeLink (&(bssp->descr), descr);
809           if (bssp->annot == NULL) {
810             bssp->annot = annot;
811             annot = NULL;
812           } else {
813             sap = bssp->annot;
814           }
815           //LCOV_EXCL_STOP
816         }
817         if (sap != NULL) {
818           tmp_sap = sap;
819           while (tmp_sap->next != NULL) {
820             tmp_sap = tmp_sap->next;
821           }
822           tmp_sap->next = annot;
823           MergeAdjacentAnnotsInList (sap);
824         }
825 
826         DeleteMultipleTitles (sep, NULL, 0, 0);
827 
828         if (relink) {
829           SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
830           RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
831         }
832         num_renormalized++;
833       }
834     }
835   }
836   return num_renormalized;
837 }
838 
839 
840 //LCOV_EXCL_START
841 //only used by RemoveSingleItemSet, which is not used by cleanup
SetHasAlignments(BioseqSetPtr bssp)842 static Boolean SetHasAlignments (BioseqSetPtr bssp)
843 {
844   SeqAnnotPtr sap;
845   Boolean     rval = FALSE;
846 
847   if (bssp == NULL) {
848     return FALSE;
849   }
850   for (sap = bssp->annot; sap != NULL && !rval; sap = sap->next) {
851     if (sap->type == 2) {
852       rval = TRUE;
853     }
854   }
855   return rval;
856 }
857 
858 
859 //not used by cleanup
RemoveSingleItemSet(SeqEntryPtr sep,Boolean relink)860 NLM_EXTERN Int4 RemoveSingleItemSet(SeqEntryPtr sep, Boolean relink)
861 {
862   SeqAnnotPtr    annot;
863   BioseqPtr      bsp;
864   BioseqSetPtr   bssp;
865   ValNodePtr     descr;
866   ObjMgrDataPtr  omdptop;
867   ObjMgrData     omdata;
868   Uint2          parenttype;
869   Pointer        parentptr;
870   SeqAnnotPtr    sap, tmp_sap;
871   SeqEntryPtr    seqentry, sep_next;
872   Int4           num_renormalized = 0;
873 
874   if (sep == NULL
875       || !IS_Bioseq_set (sep)
876       || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
877     return 0;
878   }
879 
880   if ((bssp->_class == BioseqseqSet_class_pop_set
881        || bssp->_class == BioseqseqSet_class_phy_set
882        || bssp->_class == BioseqseqSet_class_mut_set
883        || bssp->_class == BioseqseqSet_class_eco_set)
884       && bssp->seq_set != NULL
885       && bssp->seq_set->next == NULL
886       && !SetHasAlignments(bssp)) {
887 
888     seqentry = bssp->seq_set;
889 
890     if (relink) {
891       SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
892       GetSeqEntryParent (sep, &parentptr, &parenttype);
893     }
894 
895     descr = bssp->descr;
896     bssp->descr = NULL;
897     annot = bssp->annot;
898     bssp->annot = NULL;
899 
900     sep->choice = seqentry->choice;
901     sep->data.ptrvalue = seqentry->data.ptrvalue;
902     seqentry->data.ptrvalue = NULL;
903     bssp->seq_set = NULL;
904     bssp->seqentry = NULL;
905     MemFree (seqentry);
906     BioseqSetFree (bssp);
907 
908     sap = NULL;
909     if (IS_Bioseq (sep)) {
910       bsp = (BioseqPtr) sep->data.ptrvalue;
911       ValNodeLink (&(bsp->descr), descr);
912       if (bsp->annot == NULL) {
913         bsp->annot = annot;
914         annot = NULL;
915       } else {
916         sap = bsp->annot;
917       }
918     } else if (IS_Bioseq_set (sep)) {
919       bssp = (BioseqSetPtr) sep->data.ptrvalue;
920       ValNodeLink (&(bssp->descr), descr);
921       if (bssp->annot == NULL) {
922         bssp->annot = annot;
923         annot = NULL;
924       } else {
925         sap = bssp->annot;
926       }
927     }
928     if (sap != NULL) {
929       tmp_sap = sap;
930       while (tmp_sap->next != NULL) {
931         tmp_sap = tmp_sap->next;
932       }
933       tmp_sap->next = annot;
934       MergeAdjacentAnnotsInList (sap);
935     }
936 
937     DeleteMultipleTitles (sep, NULL, 0, 0);
938 
939     if (relink) {
940       SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
941       RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
942     }
943     num_renormalized++;
944   } else {
945     for (sep = bssp->seq_set; sep != NULL; sep = sep_next) {
946       sep_next = sep->next;
947       num_renormalized += RemoveSingleItemSet (sep, relink);
948     }
949   }
950 
951   return num_renormalized;
952 }
953 
954 
IsExtractableDescriptor(SeqDescPtr sdp)955 static Boolean IsExtractableDescriptor (SeqDescPtr sdp)
956 {
957   if (sdp == NULL) {
958     return FALSE;
959   }
960   if (sdp->choice == Seq_descr_pub || sdp->choice == Seq_descr_source) {
961     return TRUE;
962   } else if (sdp->choice == Seq_descr_user && IsDBLinkObject(sdp->data.ptrvalue)) {
963     return TRUE;
964   } else {
965     return FALSE;
966   }
967 }
968 
969 
ExtractBioSourceAndPubs(SeqEntryPtr sep)970 NLM_EXTERN ValNodePtr ExtractBioSourceAndPubs (SeqEntryPtr sep)
971 
972 {
973   BioseqPtr     bsp;
974   BioseqSetPtr  bssp;
975   ValNodePtr    descr;
976   ValNodePtr    last;
977   ValNodePtr    nextsdp;
978   Pointer PNTR  prevsdp;
979   ValNodePtr    sdp;
980 
981   if (sep == NULL || sep->data.ptrvalue == NULL) return NULL;
982   descr = NULL;
983   last = NULL;
984   sdp = NULL;
985   if (IS_Bioseq (sep)) {
986     bsp = (BioseqPtr) sep->data.ptrvalue;
987     sdp = bsp->descr;
988     prevsdp = (Pointer PNTR) &(bsp->descr);
989   } else if (IS_Bioseq_set (sep)) {
990     bssp = (BioseqSetPtr) sep->data.ptrvalue;
991     sdp = bssp->descr;
992     prevsdp = (Pointer PNTR) &(bssp->descr);
993   } else return NULL;
994   while (sdp != NULL) {
995     nextsdp = sdp->next;
996     if (IsExtractableDescriptor(sdp)) {
997       *(prevsdp) = sdp->next;
998       sdp->next = NULL;
999       if (descr == NULL) {
1000         descr = sdp;
1001         last = descr;
1002       } else if (last != NULL) {
1003         last->next = sdp;
1004         last = last->next;
1005       }
1006     } else {
1007       prevsdp = (Pointer PNTR) &(sdp->next);
1008     }
1009     sdp = nextsdp;
1010   }
1011   return descr;
1012 }
1013 
ReplaceBioSourceAndPubs(SeqEntryPtr sep,ValNodePtr descr)1014 NLM_EXTERN void ReplaceBioSourceAndPubs (SeqEntryPtr sep, ValNodePtr descr)
1015 
1016 {
1017   BioseqPtr     bsp;
1018   BioseqSetPtr  bssp;
1019   ValNodePtr    last;
1020   Pointer PNTR  prevsdp;
1021   ValNodePtr    sdp;
1022 
1023   if (sep == NULL || descr == NULL) return;
1024   if (IS_Bioseq (sep)) {
1025     bsp = (BioseqPtr) sep->data.ptrvalue;
1026     sdp = bsp->descr;
1027     prevsdp = (Pointer PNTR) &(bsp->descr);
1028   } else if (IS_Bioseq_set (sep)) {
1029     bssp = (BioseqSetPtr) sep->data.ptrvalue;
1030     sdp = bssp->descr;
1031     prevsdp = (Pointer PNTR) &(bssp->descr);
1032   } else return;
1033   last = descr;
1034   while (last->next != NULL) {
1035     last = last->next;
1036   }
1037   last->next = sdp;
1038   *(prevsdp) = descr;
1039 }
1040 
1041 typedef struct targetdata {
1042   BioseqPtr    bsp;
1043   SeqEntryPtr  nps;
1044   Boolean      skipGenProdSet;
1045 } TargetData, PNTR TargetDataPtr;
1046 
ReturnStackToItem(GatherContextPtr gcp)1047 static Boolean ReturnStackToItem (GatherContextPtr gcp)
1048 
1049 {
1050   BioseqSetPtr   bssp;
1051   Int2           i;
1052   Uint2          itemtype;
1053   TargetDataPtr  tdp;
1054 
1055   if (gcp == NULL) return TRUE;
1056   tdp = (TargetDataPtr) gcp->userdata;
1057   if (tdp == NULL) return TRUE;
1058   if (gcp->gatherstack != NULL && gcp->numstack > 0) {
1059     for (i = 0; i < gcp->numstack; i++) {
1060       itemtype = gcp->gatherstack [i].itemtype;
1061       if (itemtype == OBJ_BIOSEQ || itemtype == OBJ_BIOSEQSET) {
1062         tdp->nps = SeqMgrGetSeqEntryForData (gcp->gatherstack [i].thisitem);
1063         if (gcp->gatherstack [i].itemtype == OBJ_BIOSEQSET) {
1064           bssp = (BioseqSetPtr) gcp->gatherstack [i].thisitem;
1065           if (bssp->_class != BioseqseqSet_class_genbank &&
1066               bssp->_class != BioseqseqSet_class_mut_set &&
1067               bssp->_class != BioseqseqSet_class_pop_set &&
1068               bssp->_class != BioseqseqSet_class_phy_set &&
1069               bssp->_class != BioseqseqSet_class_eco_set &&
1070               bssp->_class != BioseqseqSet_class_wgs_set &&
1071               bssp->_class != BioseqseqSet_class_small_genome_set &&
1072               (bssp->_class != BioseqseqSet_class_gen_prod_set ||
1073            (! tdp->skipGenProdSet))) {
1074             return FALSE;
1075           }
1076         } else if (gcp->gatherstack [i].itemtype == OBJ_BIOSEQ) {
1077           return FALSE;
1078         }
1079       }
1080     }
1081   }
1082   return FALSE;
1083 }
1084 
GetStackToTarget(GatherContextPtr gcp)1085 static Boolean GetStackToTarget (GatherContextPtr gcp)
1086 
1087 {
1088   TargetDataPtr  tdp;
1089 
1090   if (gcp == NULL) return TRUE;
1091   tdp = (TargetDataPtr) gcp->userdata;
1092   if (tdp == NULL) return TRUE;
1093   if (gcp->thistype == OBJ_BIOSEQ) {
1094     if (tdp->bsp == (BioseqPtr) gcp->thisitem) {
1095       return ReturnStackToItem (gcp);
1096     }
1097   }
1098   return TRUE;
1099 }
1100 
GetBestTopParentForDataEx(Uint2 entityID,BioseqPtr bsp,Boolean skipGenProdSet)1101 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForDataEx (Uint2 entityID, BioseqPtr bsp, Boolean skipGenProdSet)
1102 
1103 {
1104   BioseqSetPtr  bssp;
1105   BioseqSetPtr  parent;
1106   GatherScope   gs;
1107   TargetData    td;
1108 
1109   td.bsp = bsp;
1110   td.nps = NULL;
1111   td.skipGenProdSet = skipGenProdSet;
1112   if (entityID > 0 && bsp != NULL) {
1113     if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
1114       bssp = (BioseqSetPtr) bsp->idx.parentptr;
1115       if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts && bssp->idx.parenttype == OBJ_BIOSEQSET) {
1116         parent = (BioseqSetPtr) bssp->idx.parentptr;
1117         if (parent != NULL && parent->_class == BioseqseqSet_class_segset) {
1118           bssp = parent;
1119         }
1120       }
1121       if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset && bssp->idx.parenttype == OBJ_BIOSEQSET) {
1122         parent = (BioseqSetPtr) bssp->idx.parentptr;
1123         if (parent != NULL && parent->_class == BioseqseqSet_class_nuc_prot) {
1124           bssp = parent;
1125         }
1126       }
1127       if (bssp != NULL && bssp->seqentry != NULL) {
1128         if (bssp->_class == BioseqseqSet_class_nuc_prot ||
1129             bssp->_class == BioseqseqSet_class_segset ||
1130             bssp->_class == BioseqseqSet_class_parts) {
1131           return bssp->seqentry;
1132         }
1133       }
1134       if (bsp->seqentry != NULL) {
1135         return bsp->seqentry;
1136       }
1137     }
1138     MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
1139     gs.seglevels = 1;
1140     MemSet ((Pointer) (gs.ignore), (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean)));
1141     gs.ignore[OBJ_BIOSEQ] = FALSE;
1142     gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
1143     GatherEntity (entityID, (Pointer) &td, GetStackToTarget, &gs);
1144   }
1145   return td.nps;
1146 }
1147 
GetBestTopParentForData(Uint2 entityID,BioseqPtr bsp)1148 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForData (Uint2 entityID, BioseqPtr bsp)
1149 
1150 {
1151   return GetBestTopParentForDataEx (entityID, bsp, FALSE);
1152 }
1153 
GetBestTopParentForItemIDEx(Uint2 entityID,Uint4 itemID,Uint2 itemtype,Boolean skipGenProdSet)1154 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemIDEx (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Boolean skipGenProdSet)
1155 
1156 {
1157   TargetData  td;
1158 
1159   td.bsp = NULL;
1160   td.nps = NULL;
1161   td.skipGenProdSet = skipGenProdSet;
1162   if (entityID > 0 && itemID > 0 && itemtype > 0) {
1163     GatherItem (entityID, itemID, itemtype, (Pointer) &td, ReturnStackToItem);
1164   }
1165   return td.nps;
1166 }
1167 
GetBestTopParentForItemID(Uint2 entityID,Uint4 itemID,Uint2 itemtype)1168 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemID (Uint2 entityID, Uint4 itemID, Uint2 itemtype)
1169 
1170 {
1171   return GetBestTopParentForItemIDEx (entityID, itemID, itemtype, FALSE);
1172 }
1173 
GetTopSeqEntryForEntityID(Uint2 entityID)1174 NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID)
1175 
1176 {
1177   ObjMgrDataPtr  omdp;
1178   SeqSubmitPtr   ssp;
1179 
1180   omdp = ObjMgrGetData (entityID);
1181   if (omdp != NULL) {
1182     switch (omdp->datatype) {
1183       case OBJ_SEQSUB :
1184         ssp = (SeqSubmitPtr) omdp->dataptr;
1185         if (ssp != NULL && ssp->datatype == 1) {
1186           return (SeqEntryPtr) ssp->data;
1187         }
1188         break;
1189       case OBJ_BIOSEQ :
1190         return (SeqEntryPtr) omdp->choice;
1191       case OBJ_BIOSEQSET :
1192         return (SeqEntryPtr) omdp->choice;
1193       default :
1194         break;
1195     }
1196   }
1197   return NULL;
1198 }
1199 //LCOV_EXCL_STOP
1200 
CheckSeqLocForPartialEx(SeqLocPtr location,BoolPtr p5ptr,BoolPtr p3ptr,Int4Ptr limptr)1201 NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr)
1202 
1203 {
1204   SeqLocPtr   firstSlp;
1205   IntFuzzPtr  ifp;
1206   SeqLocPtr   lastSlp;
1207   Int4        lim;
1208   Boolean     partial5;
1209   Boolean     partial3;
1210   SeqIntPtr   sip;
1211   SeqLocPtr   slp;
1212   SeqPntPtr   spp;
1213 
1214   partial5 = FALSE;
1215   partial3 = FALSE;
1216   lim = -1;
1217   if (location != NULL) {
1218     firstSlp = NULL;
1219     lastSlp = NULL;
1220     slp = SeqLocFindNext (location, NULL);
1221     while (slp != NULL) {
1222       if (firstSlp == NULL) {
1223         firstSlp = slp;
1224       }
1225       lastSlp = slp;
1226       slp = SeqLocFindNext (location, slp);
1227     }
1228     if (firstSlp != NULL) {
1229       if (firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
1230         sip = (SeqIntPtr) firstSlp->data.ptrvalue;
1231         if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1232           ifp = sip->if_to;
1233           if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1234             partial5 = TRUE;
1235           }
1236         } else {
1237           ifp = sip->if_from;
1238           if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1239             partial5 = TRUE;
1240           }
1241         }
1242       } else if (firstSlp->choice == SEQLOC_PNT && firstSlp->data.ptrvalue != NULL) {
1243         spp = (SeqPntPtr) firstSlp->data.ptrvalue;
1244         if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1245           ifp = spp->fuzz;
1246           if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1247             partial5 = TRUE;
1248           }
1249         } else {
1250           ifp = spp->fuzz;
1251           if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1252             partial5 = TRUE;
1253           }
1254         }
1255         ifp = spp->fuzz;
1256         if (ifp != NULL && ifp->choice == 4) {
1257           lim = ifp->a;
1258         }
1259       }
1260     }
1261     if (lastSlp != NULL) {
1262       if (lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
1263         sip = (SeqIntPtr) lastSlp->data.ptrvalue;
1264         if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1265           ifp = sip->if_from;
1266           if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1267             partial3 = TRUE;
1268           }
1269         } else {
1270           ifp = sip->if_to;
1271           if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1272             partial3 = TRUE;
1273           }
1274         }
1275       } else if (lastSlp->choice == SEQLOC_PNT && lastSlp->data.ptrvalue != NULL) {
1276         spp = (SeqPntPtr) lastSlp->data.ptrvalue;
1277         if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1278           ifp = spp->fuzz;
1279           if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1280             partial3 = TRUE;
1281           }
1282         } else {
1283           ifp = spp->fuzz;
1284           if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1285             partial3 = TRUE;
1286           }
1287         }
1288         ifp = spp->fuzz;
1289         if (ifp != NULL && ifp->choice == 4) {
1290           lim = ifp->a;
1291         }
1292       }
1293     }
1294   }
1295   if (p5ptr != NULL) {
1296     *p5ptr = partial5;
1297   }
1298   if (p3ptr != NULL) {
1299     *p3ptr = partial3;
1300   }
1301   if (limptr != NULL) {
1302     *limptr = lim;
1303   }
1304   return (Boolean) (partial5 || partial3 || lim == 3 || lim == 4);
1305 }
1306 
CheckSeqLocForPartial(SeqLocPtr location,BoolPtr p5ptr,BoolPtr p3ptr)1307 NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr)
1308 
1309 {
1310   return CheckSeqLocForPartialEx (location, p5ptr, p3ptr, NULL);
1311 }
1312 
ConvertWholeToIntLoc(SeqLocPtr slp)1313 static void ConvertWholeToIntLoc (SeqLocPtr slp)
1314 {
1315   BioseqPtr bsp;
1316   SeqIntPtr sip;
1317 
1318   if (slp == NULL || slp->choice != SEQLOC_WHOLE || slp->data.ptrvalue == NULL)
1319   {
1320     return;
1321   }
1322   bsp = BioseqFind (slp->data.ptrvalue);
1323   if (bsp == NULL)
1324   {
1325     return;
1326   }
1327 
1328   sip = SeqIntNew ();
1329   if (sip != NULL)
1330   {
1331     sip->from = 0;
1332     sip->to = bsp->length - 1;
1333     sip->id = SeqIdDup (SeqIdFindBest (bsp->id, 0));
1334     sip->strand = bsp->strand;
1335     slp->data.ptrvalue = SeqIdFree (slp->data.ptrvalue);
1336     slp->data.ptrvalue = sip;
1337     slp->choice = SEQLOC_INT;
1338   }
1339 }
1340 
SetSeqLocPartialEx(SeqLocPtr location,Boolean partial5,Boolean partial3,Int4 lim)1341 NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim)
1342 
1343 {
1344   SeqLocPtr   firstSlp;
1345   IntFuzzPtr  ifp;
1346   SeqLocPtr   lastSlp;
1347   SeqIntPtr   sip;
1348   SeqLocPtr   slp;
1349   SeqPntPtr   spp;
1350 
1351   if (location != NULL) {
1352     /* if whole, need to convert to int */
1353     if (partial5 || partial3)
1354     {
1355       ConvertWholeToIntLoc (location);
1356     }
1357 
1358     firstSlp = NULL;
1359     lastSlp = NULL;
1360     slp = SeqLocFindNext (location, NULL);
1361     while (slp != NULL) {
1362       if (firstSlp == NULL) {
1363         firstSlp = slp;
1364       }
1365       lastSlp = slp;
1366       slp = SeqLocFindNext (location, slp);
1367     }
1368     if (firstSlp != NULL) {
1369       if (firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
1370         sip = (SeqIntPtr) firstSlp->data.ptrvalue;
1371         if (partial5) {
1372           ifp = IntFuzzNew ();
1373           if (ifp != NULL) {
1374             ifp->choice = 4;
1375             if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1376               sip->if_to = IntFuzzFree (sip->if_to);
1377               sip->if_to = ifp;
1378               ifp->a = 1;
1379             } else {
1380               sip->if_from = IntFuzzFree (sip->if_from);
1381               sip->if_from = ifp;
1382               ifp->a = 2;
1383             }
1384           }
1385         } else {
1386           if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1387             sip->if_to = IntFuzzFree (sip->if_to);
1388           } else {
1389             sip->if_from = IntFuzzFree (sip->if_from);
1390           }
1391         }
1392       } else if (firstSlp->choice == SEQLOC_PNT && firstSlp->data.ptrvalue != NULL) {
1393         spp = (SeqPntPtr) firstSlp->data.ptrvalue;
1394         if (partial5) {
1395           ifp = IntFuzzNew ();
1396           if (ifp != NULL) {
1397             ifp->choice = 4;
1398             if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1399               spp->fuzz = IntFuzzFree (spp->fuzz);
1400               spp->fuzz = ifp;
1401               ifp->a = 1;
1402             } else {
1403               spp->fuzz = IntFuzzFree (spp->fuzz);
1404               spp->fuzz = ifp;
1405               ifp->a = 2;
1406             }
1407           }
1408         } else if (lim == 3 || lim == 4) {
1409           ifp = IntFuzzNew ();
1410           if (ifp != NULL) {
1411             ifp->choice = 4;
1412             spp->fuzz = IntFuzzFree (spp->fuzz);
1413             spp->fuzz = ifp;
1414             ifp->a = lim;
1415           }
1416         } else {
1417           if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1418             spp->fuzz = IntFuzzFree (spp->fuzz);
1419           } else {
1420             spp->fuzz = IntFuzzFree (spp->fuzz);
1421           }
1422         }
1423       }
1424     }
1425     if (lastSlp != NULL) {
1426       if (lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
1427         sip = (SeqIntPtr) lastSlp->data.ptrvalue;
1428         if (partial3) {
1429           ifp = IntFuzzNew ();
1430           if (ifp != NULL) {
1431             ifp->choice = 4;
1432             if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1433               sip->if_from = IntFuzzFree (sip->if_from);
1434               sip->if_from = ifp;
1435               ifp->a = 2;
1436             } else {
1437               sip->if_to = IntFuzzFree (sip->if_to);
1438               sip->if_to = ifp;
1439               ifp->a = 1;
1440             }
1441           }
1442         } else {
1443           if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1444             sip->if_from = IntFuzzFree (sip->if_from);
1445           } else {
1446             sip->if_to = IntFuzzFree (sip->if_to);
1447           }
1448         }
1449       } else if (lastSlp->choice == SEQLOC_PNT && lastSlp->data.ptrvalue != NULL) {
1450         spp = (SeqPntPtr) lastSlp->data.ptrvalue;
1451         if (partial3) {
1452           ifp = IntFuzzNew ();
1453           if (ifp != NULL) {
1454             ifp->choice = 4;
1455             if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1456               spp->fuzz = IntFuzzFree (spp->fuzz);
1457               spp->fuzz = ifp;
1458               ifp->a = 2;
1459             } else {
1460               spp->fuzz = IntFuzzFree (spp->fuzz);
1461               spp->fuzz = ifp;
1462               ifp->a = 1;
1463             }
1464           }
1465         } else if (lim == 3 || lim == 4) {
1466           ifp = IntFuzzNew ();
1467           if (ifp != NULL) {
1468             ifp->choice = 4;
1469             spp->fuzz = IntFuzzFree (spp->fuzz);
1470             spp->fuzz = ifp;
1471             ifp->a = lim;
1472           }
1473         } else {
1474           if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1475             spp->fuzz = IntFuzzFree (spp->fuzz);
1476           } else {
1477             spp->fuzz = IntFuzzFree (spp->fuzz);
1478           }
1479         }
1480       }
1481     }
1482   }
1483 }
1484 
SetSeqLocPartial(SeqLocPtr location,Boolean partial5,Boolean partial3)1485 NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3)
1486 
1487 {
1488   SetSeqLocPartialEx (location, partial5, partial3, -1);
1489 }
1490 
1491 //LCOV_EXCL_START
GetSeqLocPartialSet(SeqLocPtr location)1492 NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
1493 
1494 {
1495   ValNodePtr  head = NULL, last = NULL, vnp;
1496   Int4        lim;
1497   Boolean     noLeft;
1498   Boolean     noRight;
1499   SeqLocPtr   slp;
1500   Int4        val;
1501 
1502   if (location == NULL) return NULL;
1503 
1504   slp = SeqLocFindNext (location, NULL);
1505   while (slp != NULL) {
1506     CheckSeqLocForPartialEx (slp, &noLeft, &noRight, &lim);
1507     val = 0;
1508     if (noLeft) {
1509       val |= 2;
1510     }
1511     if (noRight) {
1512       val |= 1;
1513     }
1514     if (lim == 3) {
1515       val |= 4;
1516     } else if (lim == 4) {
1517       val |= 8;
1518     }
1519     vnp = ValNodeAddInt (&last, 0, val);
1520     if (head == NULL) {
1521       head = vnp;
1522     }
1523     last = vnp;
1524     slp = SeqLocFindNext (location, slp);
1525   }
1526 
1527   return head;
1528 }
1529 
SetSeqLocPartialSet(SeqLocPtr location,ValNodePtr vnp)1530 NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp)
1531 
1532 {
1533   Int4        lim;
1534   Boolean     noLeft;
1535   Boolean     noRight;
1536   SeqLocPtr   slp;
1537   Int4        val;
1538 
1539   if (location == NULL || vnp == NULL) return;
1540 
1541   slp = SeqLocFindNext (location, NULL);
1542   while (slp != NULL && vnp != NULL) {
1543     val = (Int4) vnp->data.intvalue;
1544     noLeft = (Boolean) ((val & 2) != 0);
1545     noRight = (Boolean) ((val & 1) != 0);
1546     lim = -1;
1547     if ((val & 4) != 0) {
1548       lim = 3;
1549     } else if ((val & 8) != 0) {
1550       lim = 4;
1551     }
1552     SetSeqLocPartialEx (slp, noLeft, noRight, lim);
1553     slp = SeqLocFindNext (location, slp);
1554     vnp = vnp->next;
1555   }
1556 }
1557 
1558 /* KeyTag section */
1559 
SortVnpByString(VoidPtr ptr1,VoidPtr ptr2)1560 NLM_EXTERN int LIBCALLBACK SortVnpByString (VoidPtr ptr1, VoidPtr ptr2)
1561 
1562 {
1563   CharPtr     str1;
1564   CharPtr     str2;
1565   ValNodePtr  vnp1;
1566   ValNodePtr  vnp2;
1567 
1568   if (ptr1 != NULL && ptr2 != NULL) {
1569     vnp1 = *((ValNodePtr PNTR) ptr1);
1570     vnp2 = *((ValNodePtr PNTR) ptr2);
1571     if (vnp1 != NULL && vnp2 != NULL) {
1572       str1 = (CharPtr) vnp1->data.ptrvalue;
1573       str2 = (CharPtr) vnp2->data.ptrvalue;
1574       if (str1 != NULL && str2 != NULL) {
1575         return StringICmp (str1, str2);
1576       }
1577     }
1578   }
1579   return 0;
1580 }
1581 
SortVnpByStringCS(VoidPtr ptr1,VoidPtr ptr2)1582 NLM_EXTERN int LIBCALLBACK SortVnpByStringCS (VoidPtr ptr1, VoidPtr ptr2)
1583 
1584 {
1585   CharPtr     str1;
1586   CharPtr     str2;
1587   ValNodePtr  vnp1;
1588   ValNodePtr  vnp2;
1589 
1590   if (ptr1 != NULL && ptr2 != NULL) {
1591     vnp1 = *((ValNodePtr PNTR) ptr1);
1592     vnp2 = *((ValNodePtr PNTR) ptr2);
1593     if (vnp1 != NULL && vnp2 != NULL) {
1594       str1 = (CharPtr) vnp1->data.ptrvalue;
1595       str2 = (CharPtr) vnp2->data.ptrvalue;
1596       if (str1 != NULL && str2 != NULL) {
1597         return StringCmp (str1, str2);
1598       }
1599     }
1600   }
1601   return 0;
1602 }
1603 
SortVnpByStringCI(VoidPtr ptr1,VoidPtr ptr2)1604 NLM_EXTERN int LIBCALLBACK SortVnpByStringCI (VoidPtr ptr1, VoidPtr ptr2)
1605 
1606 {
1607   CharPtr     str1;
1608   CharPtr     str2;
1609   ValNodePtr  vnp1;
1610   ValNodePtr  vnp2;
1611 
1612   if (ptr1 != NULL && ptr2 != NULL) {
1613     vnp1 = *((ValNodePtr PNTR) ptr1);
1614     vnp2 = *((ValNodePtr PNTR) ptr2);
1615     if (vnp1 != NULL && vnp2 != NULL) {
1616       str1 = (CharPtr) vnp1->data.ptrvalue;
1617       str2 = (CharPtr) vnp2->data.ptrvalue;
1618       if (str1 != NULL && str2 != NULL) {
1619         return StringCmp (str1, str2);
1620       }
1621     }
1622   }
1623   return 0;
1624 }
1625 
SortVnpByStringCIUCFirst(VoidPtr ptr1,VoidPtr ptr2)1626 NLM_EXTERN int LIBCALLBACK SortVnpByStringCIUCFirst (VoidPtr ptr1, VoidPtr ptr2)
1627 
1628 {
1629   int         comp;
1630   CharPtr     str1;
1631   CharPtr     str2;
1632   ValNodePtr  vnp1;
1633   ValNodePtr  vnp2;
1634 
1635   if (ptr1 != NULL && ptr2 != NULL) {
1636     vnp1 = *((ValNodePtr PNTR) ptr1);
1637     vnp2 = *((ValNodePtr PNTR) ptr2);
1638     if (vnp1 != NULL && vnp2 != NULL) {
1639       str1 = (CharPtr) vnp1->data.ptrvalue;
1640       str2 = (CharPtr) vnp2->data.ptrvalue;
1641       if (str1 != NULL && str2 != NULL) {
1642         comp = StringICmp (str1, str2);
1643         if (comp != 0) return comp;
1644         return StringCmp (str1, str2);
1645       }
1646     }
1647   }
1648   return 0;
1649 }
1650 
SortVnpByStringCILCFirst(VoidPtr ptr1,VoidPtr ptr2)1651 NLM_EXTERN int LIBCALLBACK SortVnpByStringCILCFirst (VoidPtr ptr1, VoidPtr ptr2)
1652 
1653 {
1654   int         comp;
1655   CharPtr     str1;
1656   CharPtr     str2;
1657   ValNodePtr  vnp1;
1658   ValNodePtr  vnp2;
1659 
1660   if (ptr1 != NULL && ptr2 != NULL) {
1661     vnp1 = *((ValNodePtr PNTR) ptr1);
1662     vnp2 = *((ValNodePtr PNTR) ptr2);
1663     if (vnp1 != NULL && vnp2 != NULL) {
1664       str1 = (CharPtr) vnp1->data.ptrvalue;
1665       str2 = (CharPtr) vnp2->data.ptrvalue;
1666       if (str1 != NULL && str2 != NULL) {
1667         comp = StringICmp (str1, str2);
1668         if (comp != 0) return comp;
1669         return StringCmp (str2, str1);
1670       }
1671     }
1672   }
1673   return 0;
1674 }
1675 
SortVnpByNaturalCS(VoidPtr ptr1,VoidPtr ptr2)1676 NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCS (VoidPtr ptr1, VoidPtr ptr2)
1677 
1678 {
1679   CharPtr     str1, str2;
1680   ValNodePtr  vnp1, vnp2;
1681 
1682   if (ptr1 == NULL || ptr2 == NULL) return 0;
1683 
1684   vnp1 = *((ValNodePtr PNTR) ptr1);
1685   vnp2 = *((ValNodePtr PNTR) ptr2);
1686   if (vnp1 == NULL || vnp2 == NULL) return 0;
1687 
1688   str1 = (CharPtr) vnp1->data.ptrvalue;
1689   str2 = (CharPtr) vnp2->data.ptrvalue;
1690   if (str1 == NULL || str2 == NULL) return 0;
1691 
1692   return NaturalStringCmp (str1, str2);
1693 }
1694 
SortVnpByNaturalCI(VoidPtr ptr1,VoidPtr ptr2)1695 NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCI (VoidPtr ptr1, VoidPtr ptr2)
1696 
1697 {
1698   CharPtr     str1, str2;
1699   ValNodePtr  vnp1, vnp2;
1700 
1701   if (ptr1 == NULL || ptr2 == NULL) return 0;
1702 
1703   vnp1 = *((ValNodePtr PNTR) ptr1);
1704   vnp2 = *((ValNodePtr PNTR) ptr2);
1705   if (vnp1 == NULL || vnp2 == NULL) return 0;
1706 
1707   str1 = (CharPtr) vnp1->data.ptrvalue;
1708   str2 = (CharPtr) vnp2->data.ptrvalue;
1709   if (str1 == NULL || str2 == NULL) return 0;
1710 
1711   return NaturalStringICmp (str1, str2);
1712 }
1713 //LCOV_EXCL_STOP
1714 
UniqueValNode(ValNodePtr list)1715 NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list)
1716 
1717 {
1718   CharPtr       last;
1719   ValNodePtr    next;
1720   Pointer PNTR  prev;
1721   CharPtr       str;
1722   ValNodePtr    vnp;
1723 
1724   if (list == NULL) return NULL;
1725   last = (CharPtr) list->data.ptrvalue;
1726   vnp = list->next;
1727   prev = (Pointer PNTR) &(list->next);
1728   while (vnp != NULL) {
1729     next = vnp->next;
1730     str = (CharPtr) vnp->data.ptrvalue;
1731     if (StringICmp (last, str) == 0) {
1732       vnp->next = NULL;
1733       *prev = next;
1734       ValNodeFreeData (vnp);
1735     } else {
1736       last = (CharPtr) vnp->data.ptrvalue;
1737       prev = (Pointer PNTR) &(vnp->next);
1738     }
1739     vnp = next;
1740   }
1741 
1742   return list;
1743 }
1744 
1745 //LCOV_EXCL_START
UniqueStringValNodeCS(ValNodePtr list)1746 NLM_EXTERN ValNodePtr UniqueStringValNodeCS (ValNodePtr list)
1747 
1748 {
1749   CharPtr       last;
1750   ValNodePtr    next;
1751   Pointer PNTR  prev;
1752   CharPtr       str;
1753   ValNodePtr    vnp;
1754 
1755   if (list == NULL) return NULL;
1756   last = (CharPtr) list->data.ptrvalue;
1757   vnp = list->next;
1758   prev = (Pointer PNTR) &(list->next);
1759   while (vnp != NULL) {
1760     next = vnp->next;
1761     str = (CharPtr) vnp->data.ptrvalue;
1762     if (StringCmp (last, str) == 0) {
1763       vnp->next = NULL;
1764       *prev = next;
1765       ValNodeFreeData (vnp);
1766     } else {
1767       last = (CharPtr) vnp->data.ptrvalue;
1768       prev = (Pointer PNTR) &(vnp->next);
1769     }
1770     vnp = next;
1771   }
1772 
1773   return list;
1774 }
1775 
UniqueStringValNodeCI(ValNodePtr list)1776 NLM_EXTERN ValNodePtr UniqueStringValNodeCI (ValNodePtr list)
1777 
1778 {
1779   CharPtr       last;
1780   ValNodePtr    next;
1781   Pointer PNTR  prev;
1782   CharPtr       str;
1783   ValNodePtr    vnp;
1784 
1785   if (list == NULL) return NULL;
1786   last = (CharPtr) list->data.ptrvalue;
1787   vnp = list->next;
1788   prev = (Pointer PNTR) &(list->next);
1789   while (vnp != NULL) {
1790     next = vnp->next;
1791     str = (CharPtr) vnp->data.ptrvalue;
1792     if (StringICmp (last, str) == 0) {
1793       vnp->next = NULL;
1794       *prev = next;
1795       ValNodeFreeData (vnp);
1796     } else {
1797       last = (CharPtr) vnp->data.ptrvalue;
1798       prev = (Pointer PNTR) &(vnp->next);
1799     }
1800     vnp = next;
1801   }
1802 
1803   return list;
1804 }
1805 
SortByChoice(VoidPtr ptr1,VoidPtr ptr2)1806 NLM_EXTERN int LIBCALLBACK SortByChoice (VoidPtr ptr1, VoidPtr ptr2)
1807 
1808 {
1809   Uint1       chs1;
1810   Uint1       chs2;
1811   ValNodePtr  vnp1;
1812   ValNodePtr  vnp2;
1813 
1814   if (ptr1 == NULL || ptr2 == NULL) return 0;
1815   vnp1 = *((ValNodePtr PNTR) ptr1);
1816   vnp2 = *((ValNodePtr PNTR) ptr2);
1817   if (vnp1 == NULL || vnp2 == NULL) return 0;
1818   chs1 = (Uint1) vnp1->choice;
1819   chs2 = (Uint1) vnp2->choice;
1820   if (chs1 > chs2) {
1821     return 1;
1822   } else if (chs1 < chs2) {
1823     return -1;
1824   }
1825   return 0;
1826 }
1827 
SortByIntvalue(VoidPtr ptr1,VoidPtr ptr2)1828 NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2)
1829 
1830 {
1831   Int4        val1;
1832   Int4        val2;
1833   ValNodePtr  vnp1;
1834   ValNodePtr  vnp2;
1835 
1836   if (ptr1 == NULL || ptr2 == NULL) return 0;
1837   vnp1 = *((ValNodePtr PNTR) ptr1);
1838   vnp2 = *((ValNodePtr PNTR) ptr2);
1839   if (vnp1 == NULL || vnp2 == NULL) return 0;
1840   val1 = (Int4) vnp1->data.intvalue;
1841   val2 = (Int4) vnp2->data.intvalue;
1842   if (val1 > val2) {
1843     return 1;
1844   } else if (val1 < val2) {
1845     return -1;
1846   }
1847   return 0;
1848 }
1849 
UniqueIntValNode(ValNodePtr list)1850 NLM_EXTERN ValNodePtr UniqueIntValNode (ValNodePtr list)
1851 
1852 {
1853   Int4          curr, last;
1854   ValNodePtr    next;
1855   Pointer PNTR  prev;
1856   ValNodePtr    vnp;
1857 
1858   if (list == NULL) return NULL;
1859   last = (Int4) list->data.intvalue;
1860   vnp = list->next;
1861   prev = (Pointer PNTR) &(list->next);
1862   while (vnp != NULL) {
1863     next = vnp->next;
1864     curr = (Int4) vnp->data.intvalue;
1865     if (last == curr) {
1866       vnp->next = NULL;
1867       *prev = next;
1868       ValNodeFree (vnp);
1869     } else {
1870       last = (Int4) vnp->data.intvalue;
1871       prev = (Pointer PNTR) &(vnp->next);
1872     }
1873     vnp = next;
1874   }
1875 
1876   return list;
1877 }
1878 
SortByPtrvalue(VoidPtr ptr1,VoidPtr ptr2)1879 NLM_EXTERN int LIBCALLBACK SortByPtrvalue (VoidPtr ptr1, VoidPtr ptr2)
1880 
1881 {
1882   VoidPtr     val1;
1883   VoidPtr     val2;
1884   ValNodePtr  vnp1;
1885   ValNodePtr  vnp2;
1886 
1887   if (ptr1 == NULL || ptr2 == NULL) return 0;
1888   vnp1 = *((ValNodePtr PNTR) ptr1);
1889   vnp2 = *((ValNodePtr PNTR) ptr2);
1890   if (vnp1 == NULL || vnp2 == NULL) return 0;
1891   val1 = (VoidPtr) vnp1->data.ptrvalue;
1892   val2 = (VoidPtr) vnp2->data.ptrvalue;
1893   if (val1 > val2) {
1894     return 1;
1895   } else if (val1 < val2) {
1896     return -1;
1897   }
1898   return 0;
1899 }
1900 
UniquePtrValNode(ValNodePtr list)1901 NLM_EXTERN ValNodePtr UniquePtrValNode (ValNodePtr list)
1902 
1903 {
1904   VoidPtr       curr, last;
1905   ValNodePtr    next;
1906   Pointer PNTR  prev;
1907   ValNodePtr    vnp;
1908 
1909   if (list == NULL) return NULL;
1910   last = (VoidPtr) list->data.ptrvalue;
1911   vnp = list->next;
1912   prev = (Pointer PNTR) &(list->next);
1913   while (vnp != NULL) {
1914     next = vnp->next;
1915     curr = (VoidPtr) vnp->data.ptrvalue;
1916     if (last == curr) {
1917       vnp->next = NULL;
1918       *prev = next;
1919       ValNodeFree (vnp);
1920     } else {
1921       last = (VoidPtr) vnp->data.ptrvalue;
1922       prev = (Pointer PNTR) &(vnp->next);
1923     }
1924     vnp = next;
1925   }
1926 
1927   return list;
1928 }
1929 
KeyTagInit(KeyTag PNTR ktp,ValNodePtr list)1930 NLM_EXTERN void KeyTagInit (KeyTag PNTR ktp, ValNodePtr list)
1931 
1932 {
1933   Int2          i;
1934   CharPtr PNTR  index;
1935   Int2          num;
1936   ValNodePtr    vnp;
1937 
1938   if (ktp == NULL || list == NULL) return;
1939   list = ValNodeSort (list, SortVnpByString);
1940   list = UniqueValNode (list);
1941   num = ValNodeLen (list);
1942   index = MemNew (sizeof (CharPtr) * (num + 1));
1943 
1944   for (vnp = list, i = 0; vnp != NULL && i < num; vnp = vnp->next, i++) {
1945     index [i] = (CharPtr) vnp->data.ptrvalue;
1946   }
1947 
1948   ktp->num = num;
1949   ktp->list = list;
1950   ktp->index = index;
1951 }
1952 
KeyTagClear(KeyTag PNTR ktp)1953 NLM_EXTERN void KeyTagClear (KeyTag PNTR ktp)
1954 
1955 {
1956   if (ktp == NULL) return;
1957   ktp->num = 0;
1958   ktp->list = ValNodeFreeData (ktp->list);
1959   ktp->index = MemFree (ktp->index);
1960 }
1961 
KeyFromTag(KeyTag PNTR ktp,CharPtr tag)1962 NLM_EXTERN Int2 KeyFromTag (KeyTag PNTR ktp, CharPtr tag)
1963 
1964 {
1965   Int2  L, R, mid, compare;
1966 
1967   if (ktp == NULL || ktp->list == NULL || ktp->index == NULL) return 0;
1968   if (tag == NULL) return 0;
1969 
1970   L = 0;
1971   R = ktp->num - 1;
1972   while (L < R) {
1973     mid = (L + R) / 2;
1974     compare = StringICmp (ktp->index [mid], tag);
1975     if (compare < 0) {
1976       L = mid + 1;
1977     } else {
1978       R = mid;
1979     }
1980   }
1981   if (StringICmp (ktp->index [R], tag) == 0) {
1982     return (R + 1);
1983   }
1984 
1985   return 0;
1986 }
1987 
TagFromKey(KeyTag PNTR ktp,Int2 key)1988 NLM_EXTERN CharPtr TagFromKey (KeyTag PNTR ktp, Int2 key)
1989 
1990 {
1991   if (ktp == NULL || ktp->list == NULL || ktp->index == NULL) return 0;
1992   if (key < 1 || key > ktp->num) return 0;
1993   key--;
1994   return ktp->index [key];
1995 }
1996 
1997 /* begin PromoteXrefs section */
1998 
1999 typedef struct geneextendlist {
2000   GeneRefPtr  grp;
2001   SeqLocPtr   slp;
2002   ObjMgrPtr   omp;
2003   Boolean     rsult;
2004   Char        label [41];
2005 } GeneExtendList, PNTR GeneExtendPtr;
2006 
GeneExtendFunc(GatherContextPtr gcp)2007 static Boolean GeneExtendFunc (GatherContextPtr gcp)
2008 
2009 {
2010   BioseqPtr      bsp;
2011   GeneExtendPtr  gep;
2012   GeneRefPtr     grp;
2013   Boolean        hasNulls;
2014   ObjMgrTypePtr  omtp;
2015   SeqFeatPtr     sfp;
2016   SeqLocPtr      slp;
2017   Char           thislabel [41];
2018 
2019   if (gcp == NULL) return TRUE;
2020 
2021   gep = (GeneExtendPtr) gcp->userdata;
2022   if (gep == NULL ) return TRUE;
2023 
2024   thislabel [0] = '\0';
2025 
2026   if (gcp->thistype == OBJ_SEQFEAT) {
2027     sfp = (SeqFeatPtr) gcp->thisitem;
2028     if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && sfp->data.value.ptrvalue != NULL) {
2029       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
2030       omtp = ObjMgrTypeFind (gep->omp, gcp->thistype, NULL, NULL);
2031       if (omtp == NULL) {
2032         return TRUE;
2033       }
2034       if (omtp->labelfunc != NULL) {
2035         (*(omtp->labelfunc)) (gcp->thisitem, thislabel, 40, OM_LABEL_CONTENT);
2036       }
2037       if (thislabel [0] != '\0') {
2038         if (StringICmp (thislabel, gep->label) == 0) {
2039           if (SeqLocCompare (gep->slp, sfp->location) != SLC_NO_MATCH) {
2040             bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
2041             if (bsp != NULL) {
2042               slp = SeqLocMerge (bsp, sfp->location, gep->slp, TRUE, FALSE, FALSE);
2043               if (slp != NULL) {
2044                 sfp->location = SeqLocFree (sfp->location);
2045                 sfp->location = slp;
2046                 if (bsp->repr == Seq_repr_seg) {
2047                   slp = SegLocToPartsEx (bsp, sfp->location, TRUE);
2048                   sfp->location = SeqLocFree (sfp->location);
2049                   sfp->location = slp;
2050                   hasNulls = LocationHasNullsBetween (sfp->location);
2051                   sfp->partial = (sfp->partial || hasNulls);
2052                 }
2053                 FreeAllFuzz (slp);
2054                 gep->rsult = TRUE;
2055               }
2056             }
2057           }
2058           return FALSE;
2059         }
2060       }
2061     }
2062   }
2063   return TRUE;
2064 }
2065 
2066 /*
2067 static Boolean ExtendGene (GeneRefPtr grp, SeqEntryPtr nsep, SeqLocPtr slp)
2068 
2069 {
2070   GeneExtendList  gel;
2071   GatherScope     gs;
2072   ObjMgrTypePtr   omtp;
2073   SeqFeatPtr      sfp;
2074 
2075   if (grp == NULL || nsep == NULL || slp == NULL) return FALSE;
2076   gel.grp = grp;
2077   gel.slp = slp;
2078   gel.omp = ObjMgrGet ();
2079   gel.label [0] = '\0';
2080   gel.rsult = FALSE;
2081   omtp = ObjMgrTypeFind (gel.omp, OBJ_SEQFEAT, NULL, NULL);
2082   if (omtp != NULL && omtp->labelfunc != NULL) {
2083     sfp = SeqFeatNew ();
2084     if (sfp != NULL) {
2085       sfp->data.choice = SEQFEAT_GENE;
2086       sfp->data.value.ptrvalue = (Pointer) grp;
2087       (*(omtp->labelfunc)) ((Pointer) sfp, gel.label, 40, OM_LABEL_CONTENT);
2088       sfp->data.value.ptrvalue = NULL;
2089       SeqFeatFree (sfp);
2090     }
2091   }
2092   MemSet ((Pointer)(&gs), 0, sizeof (GatherScope));
2093   gs.seglevels = 1;
2094   gs.get_feats_location = TRUE;
2095   MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
2096   gs.ignore[OBJ_BIOSEQ] = FALSE;
2097   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
2098   gs.ignore[OBJ_SEQFEAT] = FALSE;
2099   gs.ignore[OBJ_SEQANNOT] = FALSE;
2100   GatherSeqEntry (nsep, (Pointer) &gel, GeneExtendFunc, &gs);
2101   return gel.rsult;
2102 }
2103 */
2104 
SetEmptyGeneticCodes(SeqAnnotPtr sap,Int2 genCode)2105 NLM_EXTERN void SetEmptyGeneticCodes (SeqAnnotPtr sap, Int2 genCode)
2106 
2107 {
2108   CdRegionPtr     crp;
2109   GeneticCodePtr  gc;
2110   SeqFeatPtr      sfp;
2111   ValNodePtr      vnp;
2112 
2113   if (sap == NULL || sap->type != 1) return;
2114   for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
2115     if (sfp->data.choice == SEQFEAT_CDREGION) {
2116       crp = (CdRegionPtr) sfp->data.value.ptrvalue;
2117       if (crp != NULL) {
2118         gc = crp->genetic_code;
2119         if (gc != NULL) {
2120           vnp = gc->data.ptrvalue;
2121           if (vnp != NULL && vnp->choice == 2) {
2122             vnp->data.intvalue = (Int4) genCode;
2123             /*
2124             if (vnp->data.intvalue == 0) {
2125               vnp->data.intvalue = (Int4) genCode;
2126             }
2127             */
2128           }
2129         }
2130       }
2131     }
2132   }
2133 }
2134 
PromoteXrefsExEx(SeqFeatPtr sfp,BioseqPtr bsp,Uint2 entityID,Boolean include_stop,Boolean remove_trailingX,Boolean gen_prod_set,Boolean force_local_id,BoolPtr seq_fetch_failP)2135 NLM_EXTERN void PromoteXrefsExEx (
2136   SeqFeatPtr sfp,
2137   BioseqPtr bsp,
2138   Uint2 entityID,
2139   Boolean include_stop,
2140   Boolean remove_trailingX,
2141   Boolean gen_prod_set,
2142   Boolean force_local_id,
2143   BoolPtr seq_fetch_failP
2144 )
2145 
2146 {
2147   Int2                 adv;
2148   ByteStorePtr         bs;
2149   BioseqSetPtr         bssp;
2150   Char                 ch;
2151   CharPtr              comment;
2152   CdRegionPtr          crp;
2153   Int2                 ctr = 1;
2154   ValNodePtr           descr;
2155   SeqFeatPtr           first;
2156   GBQualPtr            gbq;
2157   Int4                 i;
2158   Char                 id [128];
2159   SeqEntryPtr          last;
2160   Char                 lcl [128];
2161   BioseqPtr            mbsp;
2162   MolInfoPtr           mip;
2163   SeqEntryPtr          msep;
2164   SeqFeatXrefPtr       next;
2165   GBQualPtr            nextqual;
2166   SeqEntryPtr          old;
2167   ObjMgrDataPtr        omdptop;
2168   ObjMgrData           omdata;
2169   Uint2                parenttype;
2170   Pointer              parentptr;
2171   Boolean              partial5;
2172   Boolean              partial3;
2173   BioseqPtr            pbsp;
2174   SeqFeatXrefPtr PNTR  prev;
2175   GBQualPtr PNTR       prevqual;
2176   SeqFeatPtr           prot;
2177   CharPtr              protseq;
2178   ProtRefPtr           prp, prp2;
2179   SeqEntryPtr          psep;
2180   CharPtr              ptr;
2181   CharPtr              rnaseq;
2182   SeqEntryPtr          sep;
2183   SeqHistPtr           shp;
2184   SeqIdPtr             sip;
2185   SeqEntryPtr          target = NULL;
2186   Uint4                version = 0;
2187   long int             val;
2188   ValNodePtr           vnp;
2189   SeqFeatXrefPtr       xref;
2190   Boolean              ok_to_remove;
2191   /*
2192   DbtagPtr             dbt;
2193   SeqFeatPtr           gene;
2194   GeneRefPtr           grp;
2195   */
2196 
2197   if (seq_fetch_failP != NULL) {
2198     *seq_fetch_failP = FALSE;
2199   }
2200 
2201   if (sfp == NULL || bsp == NULL) return;
2202 
2203   /* set subtypes, used to find mRNA features for genomic product sets */
2204 
2205   first = sfp;
2206   while (sfp != NULL) {
2207     if (sfp->idx.subtype == 0) {
2208       sfp->idx.subtype = FindFeatDefType (sfp);
2209     }
2210     sfp = sfp->next;
2211   }
2212 
2213   /* no longer expand genes specified by qualifiers on other features (except repeat_region) */
2214 
2215   /*
2216   sfp = first;
2217   while (sfp != NULL) {
2218     prev = &(sfp->xref);
2219     xref = sfp->xref;
2220     while (xref != NULL) {
2221       next = xref->next;
2222       if (xref->data.choice == SEQFEAT_GENE &&
2223           sfp->data.choice != SEQFEAT_GENE &&
2224           sfp->idx.subtype != FEATDEF_repeat_region) {
2225         grp = (GeneRefPtr) xref->data.value.ptrvalue;
2226         if (grp != NULL && SeqMgrGeneIsSuppressed (grp)) {
2227         } else {
2228           xref->data.value.ptrvalue = NULL;
2229           if (grp != NULL) {
2230             sep = SeqMgrGetSeqEntryForData (bsp);
2231             if (ExtendGene (grp, sep, sfp->location)) {
2232               GeneRefFree (grp);
2233             } else {
2234               gene = CreateNewFeature (sep, NULL, SEQFEAT_GENE, NULL);
2235               if (gene != NULL) {
2236                 gene->data.value.ptrvalue = (Pointer) grp;
2237                 gene->location = SeqLocFree (gene->location);
2238                 gene->location = AsnIoMemCopy (sfp->location,
2239                                                (AsnReadFunc) SeqLocAsnRead,
2240                                                (AsnWriteFunc) SeqLocAsnWrite);
2241                 for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
2242                   dbt = (DbtagPtr) vnp->data.ptrvalue;
2243                   if (dbt == NULL) continue;
2244                   ValNodeAddPointer (&(gene->dbxref), 0, (Pointer) DbtagDup (dbt));
2245                 }
2246               }
2247             }
2248           }
2249           *(prev) = next;
2250           xref->next = NULL;
2251           xref->data.choice = 0;
2252           SeqFeatXrefFree (xref);
2253         }
2254       } else {
2255         prev = &(xref->next);
2256       }
2257       xref = next;
2258     }
2259     sfp = sfp->next;
2260   }
2261   */
2262 
2263   /* expand mRNA features into cDNA product sequences */
2264 
2265   bssp = NULL;
2266   sep = NULL;
2267   last = NULL;
2268   if (gen_prod_set) {
2269     sep = GetTopSeqEntryForEntityID (entityID);
2270     if (IS_Bioseq_set (sep)) {
2271       bssp = (BioseqSetPtr) sep->data.ptrvalue;
2272       if (bssp != NULL && bssp->seq_set != NULL) {
2273         last = bssp->seq_set;
2274         while (last->next != NULL) {
2275           last = last->next;
2276         }
2277       }
2278     }
2279   }
2280 
2281   if (gen_prod_set && sep != NULL && bssp != NULL && last != NULL) {
2282     target = sep;
2283     SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
2284     GetSeqEntryParent (target, &parentptr, &parenttype);
2285     sfp = first;
2286     while (sfp != NULL) {
2287       if (sfp->data.choice == SEQFEAT_RNA &&
2288           /* sfp->idx.subtype != FEATDEF_tRNA && */
2289           sfp->product == NULL && (! sfp->pseudo)) {
2290         gbq = sfp->qual;
2291         prevqual = (GBQualPtr PNTR) &(sfp->qual);
2292         id [0] = '\0';
2293         sip = NULL;
2294         comment = NULL;
2295         while (gbq != NULL) {
2296           nextqual = gbq->next;
2297           if (StringICmp (gbq->qual, "transcript_id") == 0) {
2298             if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2299               ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2300                          "RNA transcript_id %s replacing %s", gbq->val, id);
2301             }
2302             *(prevqual) = gbq->next;
2303             gbq->next = NULL;
2304             StringNCpy_0 (id, gbq->val, sizeof (id));
2305             GBQualFree (gbq);
2306           } else if (StringICmp (gbq->qual, "comment") == 0 &&
2307                      StringDoesHaveText (gbq->val)) {
2308             *(prevqual) = gbq->next;
2309             gbq->next = NULL;
2310             comment = StringSave (gbq->val);
2311             GBQualFree (gbq);
2312           } else {
2313             prevqual = (GBQualPtr PNTR) &(gbq->next);
2314           }
2315           gbq = nextqual;
2316         }
2317         if (! StringHasNoText (id)) {
2318           if (StringChr (id, '|') != NULL) {
2319             sip = SeqIdParse (id);
2320           } else if (force_local_id) {
2321             sprintf (lcl, "lcl|%s", id);
2322             sip = SeqIdParse (lcl);
2323           } else {
2324             adv = ValidateAccnDotVer (id);
2325             if (adv == 0 || adv == -5) {
2326               ptr = StringChr (id, '.');
2327               if (ptr != NULL) {
2328                 *ptr = '\0';
2329                 ptr++;
2330                 if (sscanf (ptr, "%ld", &val) == 1) {
2331                   version = (Uint4) val;
2332                 }
2333               }
2334               sip = SeqIdFromAccession (id, version, NULL);
2335             } else {
2336               sprintf (lcl, "lcl|%s", id);
2337               sip = SeqIdParse (lcl);
2338             }
2339           }
2340         }
2341         if (sip != NULL || sfp->idx.subtype == FEATDEF_mRNA) {
2342           rnaseq = GetSequenceByFeature (sfp);
2343           if (rnaseq == NULL && seq_fetch_failP != NULL) {
2344             *seq_fetch_failP = TRUE;
2345           }
2346           if (rnaseq != NULL) {
2347             i = (Int4) StringLen (rnaseq);
2348             bs = BSNew (i + 2);
2349             if (bs != NULL) {
2350               BSWrite (bs, (VoidPtr) rnaseq, (Int4) StringLen (rnaseq));
2351               mbsp = BioseqNew ();
2352               if (mbsp != NULL) {
2353                 mbsp->repr = Seq_repr_raw;
2354                 mbsp->mol = Seq_mol_rna;
2355                 mbsp->seq_data_type = Seq_code_iupacna;
2356                 mbsp->seq_data = (SeqDataPtr) bs;
2357                 mbsp->length = BSLen (bs);
2358                 BioseqPack (mbsp);
2359                 bs = NULL;
2360                 /*
2361                 sep = GetTopSeqEntryForEntityID (entityID);
2362                 */
2363                 old = SeqEntrySetScope (sep);
2364                 if (sip != NULL) {
2365                   mbsp->id = sip;
2366                 } else if (sfp->idx.subtype == FEATDEF_mRNA) {
2367                   /* actually just making rapid unique ID for mRNA */
2368                   mbsp->id = MakeNewProteinSeqIdEx (sfp->location, NULL, NULL, &ctr);
2369                 }
2370                 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
2371                 SeqMgrAddToBioseqIndex (mbsp);
2372                 SeqEntrySetScope (old);
2373                 msep = SeqEntryNew ();
2374                 if (msep != NULL) {
2375                   msep->choice = 1;
2376                   msep->data.ptrvalue = (Pointer) mbsp;
2377                   SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) mbsp, msep);
2378                   mip = MolInfoNew ();
2379                   if (mip != NULL) {
2380                     switch (sfp->idx.subtype) {
2381                       case FEATDEF_preRNA :
2382                         mip->biomol = MOLECULE_TYPE_PRE_MRNA;
2383                         break;
2384                       case FEATDEF_mRNA :
2385                         mip->biomol = MOLECULE_TYPE_MRNA;
2386                         break;
2387                       case FEATDEF_tRNA :
2388                         mip->biomol = MOLECULE_TYPE_TRNA;
2389                         break;
2390                       case FEATDEF_rRNA :
2391                         mip->biomol = MOLECULE_TYPE_RRNA;
2392                         break;
2393                       case FEATDEF_snRNA :
2394                         mip->biomol = MOLECULE_TYPE_SNRNA;
2395                         break;
2396                       case FEATDEF_scRNA :
2397                         mip->biomol = MOLECULE_TYPE_SCRNA;
2398                         break;
2399                       case FEATDEF_otherRNA :
2400                         mip->biomol = MOLECULE_TYPE_TRANSCRIBED_RNA;
2401                         break;
2402                       case FEATDEF_snoRNA :
2403                         mip->biomol = MOLECULE_TYPE_SNORNA;
2404                         break;
2405                       case FEATDEF_ncRNA :
2406                         mip->biomol = MOLECULE_TYPE_NCRNA;
2407                         break;
2408                       case FEATDEF_tmRNA :
2409                         mip->biomol = MOLECULE_TYPE_TMRNA;
2410                         break;
2411                       default :
2412                         mip->biomol = 0;
2413                         break;
2414                     }
2415                     if (partial5 && partial3) {
2416                       mip->completeness = 5;
2417                     } else if (partial5) {
2418                       mip->completeness = 3;
2419                     } else if (partial3) {
2420                       mip->completeness = 4;
2421                     }
2422                     vnp = CreateNewDescriptor (msep, Seq_descr_molinfo);
2423                     if (vnp != NULL) {
2424                       vnp->data.ptrvalue = (Pointer) mip;
2425                     }
2426                   }
2427                   if (comment != NULL) {
2428                     vnp = CreateNewDescriptor (msep, Seq_descr_comment);
2429                     if (vnp != NULL) {
2430                       vnp->data.ptrvalue = (Pointer) comment;
2431                     }
2432                   }
2433                   /* add mRNA sequence to genomic product set */
2434                   last->next = msep;
2435                   last = msep;
2436                   SetSeqFeatProduct (sfp, mbsp);
2437                 }
2438               }
2439             }
2440             rnaseq = MemFree (rnaseq);
2441           }
2442         }
2443       }
2444       sfp = sfp->next;
2445     }
2446     SeqMgrLinkSeqEntry (target, parenttype, parentptr);
2447     RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
2448   }
2449 
2450   /* expand coding region features into protein product sequences */
2451 
2452   last = NULL;
2453   sfp = first;
2454   while (sfp != NULL) {
2455     prev = &(sfp->xref);
2456     xref = sfp->xref;
2457     while (xref != NULL) {
2458       next = xref->next;
2459       if (xref->data.choice == SEQFEAT_PROT &&
2460           sfp->data.choice == SEQFEAT_CDREGION &&
2461           sfp->product == NULL && (! sfp->pseudo)) {
2462         prp = (ProtRefPtr) xref->data.value.ptrvalue;
2463         ok_to_remove = TRUE;
2464         if (prp != NULL) {
2465           crp = (CdRegionPtr) sfp->data.value.ptrvalue;
2466           if (crp != NULL) {
2467 /**
2468             crp->frame = 0;
2469 **/
2470             bs = ProteinFromCdRegionEx (sfp, include_stop, remove_trailingX);
2471             if (bs == NULL && seq_fetch_failP != NULL) {
2472               *seq_fetch_failP = TRUE;
2473             }
2474             if (bs != NULL) {
2475               protseq = BSMerge (bs, NULL);
2476               bs = BSFree (bs);
2477               if (protseq != NULL) {
2478                 ptr = protseq;
2479                 ch = *ptr;
2480                 while (ch != '\0') {
2481                   *ptr = TO_UPPER (ch);
2482                   ptr++;
2483                   ch = *ptr;
2484                 }
2485                 i = (Int4) StringLen (protseq);
2486                 if (i > 0 && protseq [i - 1] == '*') {
2487                   protseq [i - 1] = '\0';
2488                 }
2489                 bs = BSNew (i + 2);
2490                 if (bs != NULL) {
2491                   ptr = protseq;
2492                   /*
2493                   if (protseq [0] == '-') {
2494                     ptr++;
2495                   }
2496                   */
2497                   BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr));
2498                 }
2499                 protseq = MemFree (protseq);
2500               }
2501               pbsp = BioseqNew ();
2502               if (pbsp != NULL) {
2503                 pbsp->repr = Seq_repr_raw;
2504                 pbsp->mol = Seq_mol_aa;
2505                 pbsp->seq_data_type = Seq_code_ncbieaa;
2506                 pbsp->seq_data = (SeqDataPtr) bs;
2507                 pbsp->length = BSLen (bs);
2508                 bs = NULL;
2509                 sep = NULL;
2510                 mbsp = NULL;
2511                 if (gen_prod_set) {
2512                   gbq = sfp->qual;
2513                   prevqual = (GBQualPtr PNTR) &(sfp->qual);
2514                   id [0] = '\0';
2515                   sip = NULL;
2516                   while (gbq != NULL) {
2517                     nextqual = gbq->next;
2518                     if (StringICmp (gbq->qual, "transcript_id") == 0) {
2519                       if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2520                         ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2521                                    "CDS transcript_id %s replacing %s", gbq->val, id);
2522                       }
2523                       *(prevqual) = gbq->next;
2524                       gbq->next = NULL;
2525                       StringNCpy_0 (id, gbq->val, sizeof (id));
2526                       GBQualFree (gbq);
2527                     } else if (StringICmp (gbq->qual, "secondary_accession") == 0) {
2528                       *(prevqual) = gbq->next;
2529                       gbq->next = NULL;
2530                       shp = ParseStringIntoSeqHist (NULL, gbq->val);
2531                       if (shp != NULL) {
2532                         pbsp->hist = shp;
2533                       }
2534                       GBQualFree (gbq);
2535                     } else {
2536                       prevqual = (GBQualPtr PNTR) &(gbq->next);
2537                     }
2538                     gbq = nextqual;
2539                   }
2540                   if (StringHasNoText (id)) {
2541                     Message (MSG_POSTERR, "No transcript_id on CDS - unable to create nuc-prot set");
2542                   } else {
2543                     if (StringChr (id, '|') != NULL) {
2544                       sip = SeqIdParse (id);
2545                     } else if (force_local_id) {
2546                       sprintf (lcl, "lcl|%s", id);
2547                       sip = SeqIdParse (lcl);
2548                     } else {
2549                       adv = ValidateAccnDotVer (id);
2550                       if (adv == 0 || adv == -5) {
2551                         ptr = StringChr (id, '.');
2552                         if (ptr != NULL) {
2553                           *ptr = '\0';
2554                           ptr++;
2555                           if (sscanf (ptr, "%ld", &val) == 1) {
2556                             version = (Uint4) val;
2557                           }
2558                         }
2559                         sip = SeqIdFromAccession (id, version, NULL);
2560                       } else {
2561                         sprintf (lcl, "lcl|%s", id);
2562                         sip = SeqIdParse (lcl);
2563                       }
2564                     }
2565                   }
2566                   mbsp = BioseqFind (sip);
2567                   SeqIdFree (sip);
2568                   if (mbsp != NULL) {
2569                     sep = SeqMgrGetSeqEntryForData (mbsp);
2570                   /*
2571                   } else {
2572                     sep = GetBestTopParentForDataEx (entityID, bsp, TRUE);
2573                   */
2574                   }
2575                 } else {
2576                   sep = GetBestTopParentForData (entityID, bsp);
2577                 }
2578                 if (sep == NULL) {
2579                   Message (MSG_POSTERR, "No location for nuc-prot set for CDS - unable to create nuc-prot set");
2580                   pbsp = BioseqFree (pbsp);
2581                   ok_to_remove = FALSE;
2582                 } else {
2583                   old = SeqEntrySetScope (sep);
2584                   gbq = sfp->qual;
2585                   prevqual = (GBQualPtr PNTR) &(sfp->qual);
2586                   id [0] = '\0';
2587                   sip = NULL;
2588                   while (gbq != NULL) {
2589                     nextqual = gbq->next;
2590                     if (StringICmp (gbq->qual, "protein_id") == 0) {
2591                       if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2592                                 ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2593                           "CDS protein_id %s replacing %s", gbq->val, id);
2594                       }
2595                       *(prevqual) = gbq->next;
2596                       gbq->next = NULL;
2597                       StringNCpy_0 (id, gbq->val, sizeof (id));
2598                       GBQualFree (gbq);
2599                     } else {
2600                       prevqual = (GBQualPtr PNTR) &(gbq->next);
2601                     }
2602                     gbq = nextqual;
2603                   }
2604                   if (! StringHasNoText (id)) {
2605                     if (StringChr (id, '|') != NULL) {
2606                       sip = SeqIdParse (id);
2607                     } else if (force_local_id) {
2608                       sprintf (lcl, "lcl|%s", id);
2609                       sip = SeqIdParse (lcl);
2610                     } else {
2611                       adv = ValidateAccnDotVer (id);
2612                       if (adv == 0 || adv == -5) {
2613                         ptr = StringChr (id, '.');
2614                         if (ptr != NULL) {
2615                           *ptr = '\0';
2616                           ptr++;
2617                           if (sscanf (ptr, "%ld", &val) == 1) {
2618                             version = (Uint4) val;
2619                           }
2620                         }
2621                         sip = SeqIdFromAccession (id, version, NULL);
2622                       } else {
2623                         sprintf (lcl, "lcl|%s", id);
2624                         sip = SeqIdParse (lcl);
2625                       }
2626                     }
2627                   }
2628                   if (sip != NULL) {
2629                     pbsp->id = sip;
2630                   } else {
2631                     pbsp->id = MakeNewProteinSeqIdEx (sfp->location, NULL, NULL, &ctr);
2632                   }
2633                   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
2634                   SeqMgrAddToBioseqIndex (pbsp);
2635                   SeqEntrySetScope (old);
2636                   psep = SeqEntryNew ();
2637                   if (psep != NULL) {
2638                     psep->choice = 1;
2639                     psep->data.ptrvalue = (Pointer) pbsp;
2640                     SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) pbsp, psep);
2641                     mip = MolInfoNew ();
2642                     if (mip != NULL) {
2643                       mip->biomol = 8;
2644                       mip->tech = 8;
2645                       if (partial5 && partial3) {
2646                         mip->completeness = 5;
2647                       } else if (partial5) {
2648                         mip->completeness = 3;
2649                       } else if (partial3) {
2650                         mip->completeness = 4;
2651                       }
2652                       vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
2653                       if (vnp != NULL) {
2654                         vnp->data.ptrvalue = (Pointer) mip;
2655                       }
2656                     }
2657                     /* the first protein may change the set/seq structure,
2658                     so goes through AddSeqEntryToSeqEntry */
2659 
2660                     if (gen_prod_set || last == NULL) {
2661                       descr = ExtractBioSourceAndPubs (sep);
2662                       AddSeqEntryToSeqEntry (sep, psep, TRUE);
2663                       ReplaceBioSourceAndPubs (sep, descr);
2664                       last = psep;
2665                     } else {
2666                       last->next = psep;
2667                       last = psep;
2668                     }
2669                     if (target == NULL) {
2670                       target = sep;
2671                       SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
2672                       GetSeqEntryParent (target, &parentptr, &parenttype);
2673                     }
2674                     SetSeqFeatProduct (sfp, pbsp);
2675                     psep = SeqMgrGetSeqEntryForData (pbsp);
2676                     if (psep != NULL) {
2677                       last = psep;
2678                       prot = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
2679                       if (prot != NULL) {
2680                         prot->data.value.ptrvalue = (Pointer) prp;
2681                         SetSeqLocPartial (prot->location, partial5, partial3);
2682                         prot->partial = (Boolean) (partial5 || partial3);
2683                       }
2684                     }
2685                   }
2686                 }
2687               }
2688             }
2689           }
2690         }
2691         if (ok_to_remove) {
2692           xref->data.value.ptrvalue = NULL;
2693           *(prev) = next;
2694           xref->next = NULL;
2695           xref->data.choice = 0;
2696           SeqFeatXrefFree (xref);
2697         } else {
2698           prev = &(xref->next);
2699         }
2700       } else {
2701         prev = &(xref->next);
2702       }
2703       xref = next;
2704     }
2705     sfp = sfp->next;
2706   }
2707 
2708   /* expand mat_peptide features with protein_id qualifiers into protein product sequences */
2709 
2710   last = NULL;
2711   sfp = first;
2712   while (sfp != NULL) {
2713     if (sfp->data.choice == SEQFEAT_PROT && sfp->product == NULL) {
2714       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
2715       gbq = sfp->qual;
2716       prevqual = (GBQualPtr PNTR) &(sfp->qual);
2717       id [0] = '\0';
2718       sip = NULL;
2719       while (gbq != NULL) {
2720         nextqual = gbq->next;
2721         if (StringICmp (gbq->qual, "protein_id") == 0) {
2722           if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2723             ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2724                        "Protein protein_id %s replacing %s",
2725                        gbq->val, id);
2726           }
2727           *(prevqual) = gbq->next;
2728           gbq->next = NULL;
2729           StringNCpy_0 (id, gbq->val, sizeof (id));
2730           GBQualFree (gbq);
2731         } else {
2732           prevqual = (GBQualPtr PNTR) &(gbq->next);
2733         }
2734         gbq = nextqual;
2735       }
2736       if (! StringHasNoText (id)) {
2737         if (StringChr (id, '|') != NULL) {
2738           sip = SeqIdParse (id);
2739         } else if (force_local_id) {
2740           sprintf (lcl, "lcl|%s", id);
2741           sip = SeqIdParse (lcl);
2742         } else {
2743           adv = ValidateAccnDotVer (id);
2744           if (adv == 0 || adv == -5) {
2745             ptr = StringChr (id, '.');
2746             if (ptr != NULL) {
2747               *ptr = '\0';
2748               ptr++;
2749               if (sscanf (ptr, "%ld", &val) == 1) {
2750                 version = (Uint4) val;
2751               }
2752             }
2753             sip = SeqIdFromAccession (id, version, NULL);
2754           } else {
2755             sprintf (lcl, "lcl|%s", id);
2756             sip = SeqIdParse (lcl);
2757           }
2758         }
2759       }
2760       if (sip != NULL) {
2761         protseq = GetSequenceByFeature (sfp);
2762         if (protseq == NULL && seq_fetch_failP != NULL) {
2763           *seq_fetch_failP = TRUE;
2764         }
2765         if (protseq != NULL) {
2766           i = (Int4) StringLen (protseq);
2767           bs = BSNew (i + 2);
2768           if (bs != NULL) {
2769             BSWrite (bs, (VoidPtr) protseq, (Int4) StringLen (protseq));
2770             pbsp = BioseqNew ();
2771             if (pbsp != NULL) {
2772               pbsp->repr = Seq_repr_raw;
2773               pbsp->mol = Seq_mol_aa;
2774               pbsp->seq_data_type = Seq_code_ncbieaa;
2775               pbsp->seq_data = (SeqDataPtr) bs;
2776               pbsp->length = BSLen (bs);
2777               bs = NULL;
2778               /*
2779               sep = GetTopSeqEntryForEntityID (entityID);
2780               */
2781               sep = GetBestTopParentForData (entityID, bsp);
2782               old = SeqEntrySetScope (sep);
2783               if (sip != NULL) {
2784                 pbsp->id = sip;
2785               } else {
2786                 pbsp->id = MakeNewProteinSeqIdEx (sfp->location, NULL, NULL, &ctr);
2787               }
2788               CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
2789               SeqMgrAddToBioseqIndex (pbsp);
2790               SeqEntrySetScope (old);
2791               psep = SeqEntryNew ();
2792               if (psep != NULL) {
2793                 psep->choice = 1;
2794                 psep->data.ptrvalue = (Pointer) pbsp;
2795                 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) pbsp, psep);
2796                 mip = MolInfoNew ();
2797                 if (mip != NULL) {
2798                   mip->biomol = MOLECULE_TYPE_PEPTIDE;
2799                   if (partial5 && partial3) {
2800                     mip->completeness = 5;
2801                   } else if (partial5) {
2802                     mip->completeness = 3;
2803                   } else if (partial3) {
2804                     mip->completeness = 4;
2805                   }
2806                   vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
2807                   if (vnp != NULL) {
2808                     vnp->data.ptrvalue = (Pointer) mip;
2809                   }
2810                 }
2811                 if (last == NULL) {
2812                   AddSeqEntryToSeqEntry (sep, psep, TRUE);
2813                   last = psep;
2814                 } else {
2815                   last->next = psep;
2816                   last = psep;
2817                 }
2818                 SetSeqFeatProduct (sfp, pbsp);
2819                 if (prp != NULL) {
2820                   prp2 = AsnIoMemCopy ((Pointer) prp,
2821                                        (AsnReadFunc) ProtRefAsnRead,
2822                                        (AsnWriteFunc) ProtRefAsnWrite);
2823                   if (prp2 != NULL) {
2824                     psep = SeqMgrGetSeqEntryForData (pbsp);
2825                     if (psep != NULL) {
2826                       prot = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
2827                       if (prot != NULL) {
2828                         prot->data.value.ptrvalue = prp2;
2829                         SetSeqLocPartial (prot->location, partial5, partial3);
2830                         prot->partial = (Boolean) (partial5 || partial3);
2831                       }
2832                     }
2833                   }
2834                 }
2835               }
2836             }
2837           }
2838           protseq = MemFree (protseq);
2839         }
2840       }
2841     }
2842     sfp = sfp->next;
2843   }
2844 
2845   if (target != NULL) {
2846     SeqMgrLinkSeqEntry (target, parenttype, parentptr);
2847     RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
2848   }
2849 }
2850 
PromoteXrefsEx(SeqFeatPtr sfp,BioseqPtr bsp,Uint2 entityID,Boolean include_stop,Boolean remove_trailingX,Boolean gen_prod_set)2851 NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, Boolean include_stop,
2852                                 Boolean remove_trailingX, Boolean gen_prod_set)
2853 
2854 {
2855   PromoteXrefsExEx (sfp, bsp, entityID, include_stop, remove_trailingX, gen_prod_set, FALSE, NULL);
2856 }
2857 
PromoteXrefs(SeqFeatPtr sfp,BioseqPtr bsp,Uint2 entityID)2858 NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID)
2859 
2860 {
2861   PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE, NULL);
2862 }
2863 //LCOV_EXCL_STOP
2864 
2865 /* begin BasicSeqEntryCleanup section */
2866 
HasNoText(CharPtr str)2867 static Boolean HasNoText (CharPtr str)
2868 
2869 {
2870   Uchar  ch;    /* to use 8bit characters in multibyte languages */
2871 
2872   if (str != NULL) {
2873     ch = *str;
2874     while (ch != '\0') {
2875       if (ch > ' ') {
2876         return FALSE;
2877       }
2878       str++;
2879       ch = *str;
2880     }
2881   }
2882   return TRUE;
2883 }
2884 
AlreadyInVnpList(ValNodePtr head,ValNodePtr curr)2885 static Boolean AlreadyInVnpList (ValNodePtr head, ValNodePtr curr)
2886 
2887 {
2888   if (head == NULL || curr == NULL) return FALSE;
2889   /* since we cannot sort these lists, must check against all previous entries */
2890   while (head != curr && head != NULL) {
2891     if (StringICmp (head->data.ptrvalue, curr->data.ptrvalue) == 0) return TRUE;
2892     head = head->next;
2893   }
2894   return FALSE;
2895 }
2896 
2897 //LCOV_EXCL_START
TrimSpacesAndSemicolons(CharPtr str)2898 NLM_EXTERN CharPtr TrimSpacesAndSemicolons (CharPtr str)
2899 
2900 {
2901   CharPtr  amp;
2902   Uchar    ch;    /* to use 8bit characters in multibyte languages */
2903   CharPtr  dst;
2904   CharPtr  ptr;
2905 
2906   if (str != NULL && str [0] != '\0') {
2907     dst = str;
2908     ptr = str;
2909     ch = *ptr;
2910     if (ch != '\0' && (ch <= ' ' || ch == ';')) {
2911       while (ch != '\0' && (ch <= ' ' || ch == ';')) {
2912         ptr++;
2913         ch = *ptr;
2914       }
2915       while (ch != '\0') {
2916         *dst = ch;
2917         dst++;
2918         ptr++;
2919         ch = *ptr;
2920       }
2921       *dst = '\0';
2922     }
2923     amp = NULL;
2924     dst = NULL;
2925     ptr = str;
2926     ch = *ptr;
2927     while (ch != '\0') {
2928       if (ch == '&') {
2929         amp = ptr;
2930         dst = NULL;
2931       } else if (ch <= ' ') {
2932         if (dst == NULL) {
2933           dst = ptr;
2934         }
2935         amp = NULL;
2936       } else if (ch == ';') {
2937         if (dst == NULL && amp == NULL) {
2938           dst = ptr;
2939         }
2940       } else {
2941         dst = NULL;
2942       }
2943       ptr++;
2944       ch = *ptr;
2945     }
2946     if (dst != NULL) {
2947       *dst = '\0';
2948     }
2949   }
2950   return str;
2951 }
2952 //LCOV_EXCL_STOP
2953 
TrimSpacesAndJunkFromEnds(CharPtr str,Boolean allowEllipsis)2954 NLM_EXTERN CharPtr TrimSpacesAndJunkFromEnds (
2955   CharPtr str,
2956   Boolean allowEllipsis
2957 )
2958 
2959 {
2960   Uchar    ch;    /* to use 8bit characters in multibyte languages */
2961   CharPtr  dst;
2962   Boolean  isPeriod;
2963   Boolean  isTilde;
2964   CharPtr  ptr;
2965 
2966   if (str != NULL && str [0] != '\0') {
2967     dst = str;
2968     ptr = str;
2969     ch = *ptr;
2970     if (ch != '\0' && (ch <= ' ' || ch == ',' || ch == ';')) {
2971       while (ch != '\0' && (ch <= ' ' || ch == ',' || ch == ';')) {
2972         ptr++;
2973         ch = *ptr;
2974       }
2975       while (ch != '\0') {
2976         *dst = ch;
2977         dst++;
2978         ptr++;
2979         ch = *ptr;
2980       }
2981       *dst = '\0';
2982     }
2983     dst = NULL;
2984     ptr = str;
2985     ch = *ptr;
2986     isPeriod = FALSE;
2987     isTilde = FALSE;
2988     while (ch != '\0') {
2989       if (ch <= ' ' || ch == '.' || ch == ',' || ch == '~' || ch == ';') {
2990         if (dst == NULL) {
2991           dst = ptr;
2992         }
2993         isPeriod = (Boolean) (isPeriod || ch == '.');
2994         isTilde = (Boolean) (isTilde || ch == '~');
2995       } else {
2996         dst = NULL;
2997         isPeriod = FALSE;
2998         isTilde = FALSE;
2999       }
3000       ptr++;
3001       ch = *ptr;
3002     }
3003     if (dst != NULL) {
3004       /* allow one period at end */
3005       if (isPeriod) {
3006         *dst = '.';
3007         dst++;
3008         /* ellipsis are now okay */
3009         if (allowEllipsis && *dst == '.' && dst [1] == '.') {
3010           dst += 2;
3011         }
3012       } else if (isTilde) {
3013         /* allow double tilde at end */
3014         if (*dst == '~' && dst [1] == '~') {
3015           dst += 2;
3016         }
3017       }
3018       *dst = '\0';
3019     }
3020   }
3021   return str;
3022 }
3023 
TrimSpacesSemicolonsAndCommas(CharPtr str)3024 static CharPtr TrimSpacesSemicolonsAndCommas (CharPtr str)
3025 
3026 {
3027   CharPtr  amp;
3028   Uchar    ch;    /* to use 8bit characters in multibyte languages */
3029   CharPtr  dst;
3030   CharPtr  ptr;
3031 
3032   if (str != NULL && str [0] != '\0') {
3033     dst = str;
3034     ptr = str;
3035     ch = *ptr;
3036     if (ch != '\0' && (ch <= ' ' || ch == ';' || ch == ',')) {
3037       while (ch != '\0' && (ch <= ' ' || ch == ';' || ch == ',')) {
3038         ptr++;
3039         ch = *ptr;
3040       }
3041       while (ch != '\0') {
3042         *dst = ch;
3043         dst++;
3044         ptr++;
3045         ch = *ptr;
3046       }
3047       *dst = '\0';
3048     }
3049     amp = NULL;
3050     dst = NULL;
3051     ptr = str;
3052     ch = *ptr;
3053     while (ch != '\0') {
3054       if (ch == '&') {
3055         amp = ptr;
3056         dst = NULL;
3057       } else if (ch <= ' ') {
3058         if (dst == NULL) {
3059           dst = ptr;
3060         }
3061         amp = NULL;
3062       } else if (ch == ';') {
3063         if (dst == NULL && amp == NULL) {
3064           dst = ptr;
3065         }
3066       } else if (ch == ',') {
3067         if (dst == NULL) {
3068           dst = ptr;
3069         }
3070         amp = NULL;
3071       } else {
3072         dst = NULL;
3073       }
3074       ptr++;
3075       ch = *ptr;
3076     }
3077     if (dst != NULL) {
3078       *dst = '\0';
3079     }
3080   }
3081   return str;
3082 }
3083 
TrimFlankingQuotes(CharPtr str)3084 static CharPtr TrimFlankingQuotes (CharPtr str)
3085 
3086 {
3087   size_t  len;
3088 
3089   if (str != NULL && str [0] != '\0') {
3090     len = StringLen (str);
3091     while (len > 0) {
3092       if (str [0] == '"' && str [len - 1] == '"') {
3093         str [0] = ' ';
3094         str [len - 1] = ' ';
3095       } else if (str [0] == '\'' && str [len - 1] == '\'') {
3096         str [0] = ' ';
3097         str [len - 1] = ' ';
3098       } else {
3099         return str;
3100       }
3101       TrimSpacesAroundString (str);
3102       len = StringLen (str);
3103     }
3104   }
3105   return str;
3106 }
3107 
RemoveFlankingQuotes(CharPtr PNTR strp)3108 static void RemoveFlankingQuotes (CharPtr PNTR strp)
3109 
3110 {
3111   if (strp == NULL) return;
3112   if (*strp == NULL) return;
3113   TrimFlankingQuotes (*strp);
3114   if (HasNoText (*strp)) {
3115     *strp = MemFree (*strp);
3116   }
3117 }
3118 
RemoveFlankingQuotesList(ValNodePtr PNTR vnpp)3119 static void RemoveFlankingQuotesList (ValNodePtr PNTR vnpp)
3120 
3121 {
3122   ValNodePtr       next;
3123   ValNodePtr PNTR  prev;
3124   ValNodePtr       vnp;
3125 
3126   if (vnpp == NULL) return;
3127   prev = vnpp;
3128   vnp = *vnpp;
3129   while (vnp != NULL) {
3130     next = vnp->next;
3131     TrimFlankingQuotes (vnp->data.ptrvalue);
3132     if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3133       *prev = vnp->next;
3134       vnp->next = NULL;
3135       ValNodeFreeData (vnp);
3136     } else {
3137       prev = &(vnp->next);
3138     }
3139     vnp = next;
3140   }
3141 }
3142 
CleanVisString(CharPtr PNTR strp)3143 static void CleanVisString (CharPtr PNTR strp)
3144 
3145 {
3146   if (strp == NULL) return;
3147   if (*strp == NULL) return;
3148   TrimSpacesSemicolonsAndCommas (*strp);
3149   if (HasNoText (*strp)) {
3150     *strp = MemFree (*strp);
3151   }
3152 }
3153 
CleanVisStringAndCompress(CharPtr PNTR strp)3154 static void CleanVisStringAndCompress (CharPtr PNTR strp)
3155 
3156 {
3157   if (strp == NULL) return;
3158   if (*strp == NULL) return;
3159   TrimSpacesSemicolonsAndCommas (*strp);
3160   Asn2gnbkCompressSpaces (*strp);
3161   if (HasNoText (*strp)) {
3162     *strp = MemFree (*strp);
3163   }
3164 }
3165 
CleanVisStringJunk(CharPtr PNTR strp)3166 static void CleanVisStringJunk (CharPtr PNTR strp)
3167 
3168 {
3169   if (strp == NULL) return;
3170   if (*strp == NULL) return;
3171   TrimSpacesAndJunkFromEnds (*strp, TRUE);
3172   if (HasNoText (*strp)) {
3173     *strp = MemFree (*strp);
3174   }
3175 }
3176 
CleanVisStringJunkAndCompress(CharPtr PNTR strp)3177 static void CleanVisStringJunkAndCompress (CharPtr PNTR strp)
3178 
3179 {
3180   if (strp == NULL) return;
3181   if (*strp == NULL) return;
3182   TrimSpacesAndJunkFromEnds (*strp, TRUE);
3183   Asn2gnbkCompressSpaces (*strp);
3184   if (HasNoText (*strp)) {
3185     *strp = MemFree (*strp);
3186   }
3187 }
3188 
CleanDoubleQuote(CharPtr str)3189 static void CleanDoubleQuote (CharPtr str)
3190 
3191 {
3192   Char  ch;
3193 
3194   if (str == NULL) return;
3195   ch = *str;
3196   while (ch != '\0') {
3197     if (ch == '"') {
3198       *str = '\'';
3199     }
3200     str++;
3201     ch = *str;
3202   }
3203 }
3204 
RemoveSpacesBetweenTildes(CharPtr str)3205 static CharPtr RemoveSpacesBetweenTildes (CharPtr str)
3206 
3207 {
3208   Char     ch;
3209   CharPtr  dst;
3210  CharPtr  ptr;
3211   CharPtr  tmp;
3212 
3213   if (str == NULL || str [0] == '\0') return str;
3214 
3215   dst = str;
3216   ptr = str;
3217   ch = *ptr;
3218   while (ch != '\0') {
3219     *dst = ch;
3220     dst++;
3221     ptr++;
3222     if (ch == '~') {
3223       tmp = ptr;
3224       ch = *tmp;
3225       while (ch != 0 && ch <= ' ') {
3226         tmp++;
3227         ch = *tmp;
3228       }
3229       if (ch == '~') {
3230         ptr = tmp;
3231       }
3232     }
3233     ch = *ptr;
3234   }
3235   *dst = '\0';
3236 
3237   return str;
3238 }
3239 
CleanVisStringList(ValNodePtr PNTR vnpp)3240 static void CleanVisStringList (ValNodePtr PNTR vnpp)
3241 
3242 {
3243   ValNodePtr       next;
3244   ValNodePtr PNTR  prev;
3245   ValNodePtr       vnp;
3246 
3247   if (vnpp == NULL) return;
3248   prev = vnpp;
3249   vnp = *vnpp;
3250   while (vnp != NULL) {
3251     next = vnp->next;
3252     TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3253     if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3254       *prev = vnp->next;
3255       vnp->next = NULL;
3256       ValNodeFreeData (vnp);
3257     } else {
3258       prev = &(vnp->next);
3259     }
3260     vnp = next;
3261   }
3262 }
3263 
CleanVisStringJunkListAndCompress(ValNodePtr PNTR vnpp)3264 static void CleanVisStringJunkListAndCompress (ValNodePtr PNTR vnpp)
3265 
3266 {
3267   ValNodePtr       next;
3268   ValNodePtr PNTR  prev;
3269   ValNodePtr       vnp;
3270 
3271   if (vnpp == NULL) return;
3272   prev = vnpp;
3273   vnp = *vnpp;
3274   while (vnp != NULL) {
3275     next = vnp->next;
3276     TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3277     TrimSpacesAndJunkFromEnds (vnp->data.ptrvalue, TRUE);
3278     Asn2gnbkCompressSpaces (vnp->data.ptrvalue);
3279     if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3280       *prev = vnp->next;
3281       vnp->next = NULL;
3282       ValNodeFreeData (vnp);
3283     } else {
3284       prev = &(vnp->next);
3285     }
3286     vnp = next;
3287   }
3288 }
3289 
CleanVisStringListAndCompress(ValNodePtr PNTR vnpp)3290 static void CleanVisStringListAndCompress (ValNodePtr PNTR vnpp)
3291 
3292 {
3293   ValNodePtr       next;
3294   ValNodePtr PNTR  prev;
3295   ValNodePtr       vnp;
3296 
3297   if (vnpp == NULL) return;
3298   prev = vnpp;
3299   vnp = *vnpp;
3300   while (vnp != NULL) {
3301     next = vnp->next;
3302     TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3303     Asn2gnbkCompressSpaces (vnp->data.ptrvalue);
3304     if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3305       *prev = vnp->next;
3306       vnp->next = NULL;
3307       ValNodeFreeData (vnp);
3308     } else {
3309       prev = &(vnp->next);
3310     }
3311     vnp = next;
3312   }
3313 }
3314 
AlreadyInVnpListCaseSensitive(ValNodePtr head,ValNodePtr curr)3315 static Boolean AlreadyInVnpListCaseSensitive (ValNodePtr head, ValNodePtr curr)
3316 
3317 {
3318   if (head == NULL || curr == NULL) return FALSE;
3319   /* since we cannot sort these lists, must check against all previous entries */
3320   while (head != curr && head != NULL) {
3321     if (StringCmp (head->data.ptrvalue, curr->data.ptrvalue) == 0) return TRUE;
3322     head = head->next;
3323   }
3324   return FALSE;
3325 }
3326 
CleanVisStringListCaseSensitive(ValNodePtr PNTR vnpp)3327 static void CleanVisStringListCaseSensitive (ValNodePtr PNTR vnpp)
3328 
3329 {
3330   ValNodePtr       next;
3331   ValNodePtr PNTR  prev;
3332   ValNodePtr       vnp;
3333 
3334   if (vnpp == NULL) return;
3335   prev = vnpp;
3336   vnp = *vnpp;
3337   while (vnp != NULL) {
3338     next = vnp->next;
3339     TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3340     if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpListCaseSensitive (*vnpp, vnp)) {
3341       *prev = vnp->next;
3342       vnp->next = NULL;
3343       ValNodeFreeData (vnp);
3344     } else {
3345       prev = &(vnp->next);
3346     }
3347     vnp = next;
3348   }
3349 }
3350 
CleanDoubleQuoteList(ValNodePtr vnp)3351 static void CleanDoubleQuoteList (ValNodePtr vnp)
3352 
3353 {
3354   while (vnp != NULL) {
3355     CleanDoubleQuote ((CharPtr) vnp->data.ptrvalue);
3356     vnp = vnp->next;
3357   }
3358 }
3359 
HandledGBQualOnGene(SeqFeatPtr sfp,GBQualPtr gbq)3360 static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq)
3361 
3362 {
3363   Int2        choice = 0;
3364   GeneRefPtr  grp;
3365 
3366   if (StringICmp (gbq->qual, "map") == 0) {
3367     choice = 2;
3368   } else if (StringICmp (gbq->qual, "allele") == 0) {
3369     choice = 3;
3370   } else if (StringICmp (gbq->qual, "locus_tag") == 0) {
3371     choice = 4;
3372   } else if (StringICmp (gbq->qual, "old_locus_tag") == 0) {
3373     choice = 5;
3374   } else if (StringICmp (gbq->qual, "gene_synonym") == 0) {
3375     choice = 6;
3376   }
3377   if (choice > 0) {
3378     grp = (GeneRefPtr) sfp->data.value.ptrvalue;
3379     if (grp == NULL) return FALSE;
3380     switch (choice) {
3381       case 2 :
3382         if (grp->maploc != NULL) return FALSE;
3383         if (StringHasNoText (gbq->val)) return FALSE;
3384         grp->maploc = StringSave (gbq->val);
3385         break;
3386       case 3 :
3387         if (StringHasNoText (gbq->val)) return FALSE;
3388         if (grp->allele != NULL) {
3389           if (StringICmp (gbq->val, grp->allele) == 0) return TRUE;
3390           return FALSE;
3391         }
3392         grp->allele = StringSave (gbq->val);
3393         break;
3394       case 4 :
3395         if (grp->locus_tag != NULL) return FALSE;
3396         if (StringHasNoText (gbq->val)) return FALSE;
3397         grp->locus_tag = StringSave (gbq->val);
3398         break;
3399       case 5 :
3400 /* removed by indexer request */
3401 /*        if (StringHasNoText (gbq->val)) return FALSE;
3402  *       if (grp->locus_tag != NULL) {
3403  *         if (StringICmp (gbq->val, grp->locus_tag) == 0) return TRUE;
3404  *         return FALSE;
3405  *       }
3406  */
3407         return FALSE;
3408         break;
3409       case 6 :
3410         if (StringHasNoText (gbq->val)) return FALSE;
3411         ValNodeCopyStr (&(grp->syn), 0, gbq->val);
3412       default :
3413         break;
3414     }
3415     return TRUE;
3416   }
3417   return FALSE;
3418 }
3419 
3420 /* code break parser functions from the flatfile parser */
3421 
GetQualValueAa(CharPtr qval)3422 static Uint1 GetQualValueAa (CharPtr qval)
3423 
3424 {
3425    CharPtr  str, eptr, ptr;
3426    Uint1    aa;
3427 
3428     str = StringStr(qval, "aa:");
3429     if (str != NULL) {
3430         str += 3;
3431     } else {
3432         ErrPostEx (SEV_WARNING, ERR_QUALIFIER_InvalidDataFormat,
3433                    "bad transl_except %s", qval);
3434         str = StringStr(qval, ",");
3435         if (str != NULL) {
3436             str = StringStr(str, ":");
3437             if (str != NULL) {
3438               str++;
3439             }
3440         }
3441     }
3442 
3443     if (str == NULL) return (Uint1) 'X';
3444 
3445        while (*str == ' ')
3446            ++str;
3447        for (eptr = str; *eptr != ')' && *eptr != ' ' && *eptr != '\0';  eptr++) continue;
3448 
3449     ptr = TextSave(str, eptr-str);
3450     aa = ValidAminoAcid(ptr);
3451     MemFree(ptr);
3452 
3453     return (aa);
3454 }
3455 
SimpleValuePos(CharPtr qval)3456 static CharPtr SimpleValuePos (CharPtr qval)
3457 
3458 {
3459    CharPtr bptr, eptr;
3460 
3461    if ((bptr = StringStr(qval, "(pos:")) == NULL) {
3462            return NULL;
3463    }
3464 
3465    bptr += 5;
3466    while (*bptr == ' ')
3467        ++bptr;
3468    eptr = StringStr (bptr, ",aa:");
3469    if (eptr == NULL) {
3470      for (eptr = bptr; *eptr != ',' && *eptr != '\0'; eptr++) continue;
3471    }
3472    if (eptr == NULL) return NULL;
3473 
3474    return (TextSave(bptr, eptr-bptr));
3475 }
3476 
3477 //LCOV_EXCL_START
3478 extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset);
ParseAnticodon(SeqFeatPtr sfp,CharPtr val,Int4 offset)3479 extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
3480 
3481 {
3482   Int4       diff;
3483   Int2       j;
3484   Boolean    locmap;
3485   int        num_errs;
3486   CharPtr    pos;
3487   Boolean    pos_range = FALSE;
3488   RnaRefPtr  rrp;
3489   SeqIntPtr  sintp;
3490   SeqIdPtr   sip;
3491   Boolean    sitesmap;
3492   SeqLocPtr  slp;
3493   SeqPntPtr  spp;
3494   Uint1      strand;
3495   Int4       temp;
3496   tRNAPtr    trp;
3497 
3498   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return FALSE;
3499   if (StringHasNoText (val)) return FALSE;
3500 
3501   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
3502   if (rrp == NULL) return FALSE;
3503 
3504   if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) {
3505     rrp->ext.choice = 2;
3506     trp = (tRNAPtr) MemNew (sizeof (tRNA));
3507     rrp->ext.value.ptrvalue = (Pointer) trp;
3508     if (trp != NULL) {
3509       trp->aatype = 2;
3510       for (j = 0; j < 6; j++) {
3511         trp->codon [j] = 255;
3512       }
3513     }
3514   }
3515   if (rrp->ext.choice != 2) return FALSE;
3516 
3517   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
3518   if (trp == NULL) return FALSE;
3519 
3520   /* find SeqId to use */
3521   sip = SeqLocId (sfp->location);
3522   if (sip == NULL) {
3523     slp = SeqLocFindNext (sfp->location, NULL);
3524     if (slp != NULL) {
3525       sip = SeqLocId (slp);
3526     }
3527   }
3528   if (sip == NULL) return FALSE;
3529 
3530   /* parse location */
3531   pos = SimpleValuePos (val);
3532   if (pos == NULL) {
3533     ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3534                "anticodon parsing failed, %s, drop the anticodon", val);
3535     return FALSE;
3536   }
3537 
3538   trp->anticodon = Nlm_gbparseint (pos, &locmap, &sitesmap, &num_errs, sip);
3539   if (trp->anticodon == NULL) {
3540     ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3541                "anticodon parsing failed, %s, drop the anticodon", pos);
3542     MemFree (pos);
3543     return FALSE;
3544   }
3545 
3546   if (trp->anticodon->choice == SEQLOC_PNT) {
3547     /* allow a single point */
3548     spp = trp->anticodon->data.ptrvalue;
3549     if (spp != NULL) {
3550       spp->point += offset;
3551     }
3552   }
3553   if (trp->anticodon->choice == SEQLOC_INT) {
3554     sintp = trp->anticodon->data.ptrvalue;
3555     if (sintp == NULL) {
3556       MemFree (pos);
3557       return FALSE;
3558     }
3559     sintp->from += offset;
3560     sintp->to += offset;
3561     if (sintp->from > sintp->to) {
3562       temp = sintp->from;
3563       sintp->from = sintp->to;
3564       sintp->to = temp;
3565     }
3566     sintp->strand = SeqLocStrand (sfp->location);
3567     strand = sintp->strand;
3568     diff = SeqLocStop(trp->anticodon) - SeqLocStart(trp->anticodon); /* SeqLocStop/Start does not do what you think */
3569     /*
3570     if ((diff != 2 && (strand != Seq_strand_minus)) ||
3571         (diff != -2 && (strand == Seq_strand_minus))) {
3572       pos_range = TRUE;
3573     }
3574     */
3575     if (diff != 2) {
3576       pos_range = TRUE;
3577     }
3578     if (num_errs > 0 || pos_range) {
3579       ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3580                  "anticodon range is wrong, %s, drop the anticodon", pos);
3581       MemFree (pos);
3582       return FALSE;
3583     }
3584     if (SeqLocCompare (sfp->location, trp->anticodon) != SLC_B_IN_A) {
3585       ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3586                  "/anticodon not in tRNA: %s", val);
3587       MemFree (pos);
3588       return FALSE;
3589     }
3590   }
3591 
3592   MemFree (pos);
3593 
3594   return TRUE;
3595 }
3596 //LCOV_EXCL_STOP
3597 
3598 extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset);
ParseCodeBreak(SeqFeatPtr sfp,CharPtr val,Int4 offset)3599 extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
3600 
3601 {
3602   CodeBreakPtr  cbp;
3603   CdRegionPtr   crp;
3604   Int4          diff;
3605   CodeBreakPtr  lastcbp;
3606   Boolean       locmap;
3607   int           num_errs;
3608   Boolean       packed_int = TRUE;
3609   CharPtr       pos;
3610   Boolean       pos_range = FALSE;
3611   SeqIntPtr     sintp;
3612   SeqIdPtr      sip;
3613   Boolean       sitesmap;
3614   SeqLocPtr     slp;
3615   SeqLocPtr     slp1, slp2;
3616   SeqPntPtr     spp;
3617   Uint1         strand;
3618   Int4          temp;
3619   CharPtr       tmp;
3620 
3621   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE;
3622   if (StringHasNoText (val)) return FALSE;
3623   crp = (CdRegionPtr) sfp->data.value.ptrvalue;
3624   if (crp == NULL) return FALSE;
3625 
3626   /* find SeqId to use */
3627   sip = SeqLocId (sfp->location);
3628   if (sip == NULL) {
3629     slp = SeqLocFindNext (sfp->location, NULL);
3630     if (slp != NULL) {
3631       sip = SeqLocId (slp);
3632     }
3633   }
3634   if (sip == NULL) return FALSE;
3635 
3636   cbp = CodeBreakNew ();
3637   if (cbp == NULL) return FALSE;
3638   cbp->aa.choice = 1; /* ncbieaa */
3639   cbp->aa.value.intvalue = (Int4) GetQualValueAa (val);
3640 
3641   /* parse location */
3642   pos = SimpleValuePos (val);
3643   if (pos == NULL) {
3644     ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3645                "transl_except parsing failed, %s, drop the transl_except", val);
3646     return FALSE;
3647   }
3648   if (StringChr (pos, ',') != NULL) {
3649     tmp = (CharPtr) MemNew ((StringLen (pos) + 10) * sizeof (Char));
3650     if (tmp != NULL) {
3651       sprintf (tmp, "join(%s)", pos);
3652       MemFree (pos);
3653       pos = tmp;
3654     }
3655   }
3656   cbp->loc = Nlm_gbparseint (pos, &locmap, &sitesmap, &num_errs, sip);
3657   if (cbp->loc == NULL) {
3658     CodeBreakFree (cbp);
3659     ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3660                "transl_except parsing failed, %s, drop the transl_except", pos);
3661     MemFree (pos);
3662     return FALSE;
3663   }
3664   if (cbp->loc->choice == SEQLOC_PNT) {
3665     /* allow a single point */
3666     spp = cbp->loc->data.ptrvalue;
3667     if (spp != NULL) {
3668       spp->point += offset;
3669     }
3670   } else if (cbp->loc->choice == SEQLOC_INT) {
3671     sintp = cbp->loc->data.ptrvalue;
3672     if (sintp == NULL) {
3673       MemFree (pos);
3674       return FALSE;
3675     }
3676     sintp->from += offset;
3677     sintp->to += offset;
3678     if (sintp->from > sintp->to) {
3679       temp = sintp->from;
3680       sintp->from = sintp->to;
3681       sintp->to = temp;
3682     }
3683     sintp->strand = SeqLocStrand (sfp->location);
3684     strand = sintp->strand;
3685     diff = SeqLocStop(cbp->loc) - SeqLocStart(cbp->loc); /* SeqLocStop/Start does not do what you think */
3686     /*
3687     if ((diff != 2 && (strand != Seq_strand_minus)) ||
3688         (diff != -2 && (strand == Seq_strand_minus))) {
3689       pos_range = TRUE;
3690     }
3691     */
3692     if (diff != 2) {
3693       pos_range = TRUE;
3694     }
3695     if (num_errs > 0 || pos_range) {
3696       CodeBreakFree (cbp);
3697       ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3698                  "transl_except range is wrong, %s, drop the transl_except", pos);
3699       MemFree (pos);
3700       return FALSE;
3701     }
3702     if (SeqLocCompare (sfp->location, cbp->loc) != SLC_B_IN_A) {
3703       CodeBreakFree (cbp);
3704       ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3705                  "/transl_except not in CDS: %s", val);
3706       MemFree (pos);
3707       return FALSE;
3708     }
3709   } else {
3710     slp1 = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE);
3711     if (slp1 != NULL) {
3712       slp2 = aaLoc_to_dnaLoc (sfp, slp1);
3713       if (slp2 != NULL) {
3714         SeqLocFree (cbp->loc);
3715         cbp->loc = slp2;
3716       }
3717       SeqLocFree (slp1);
3718     }
3719     slp = SeqLocFindNext (cbp->loc, NULL);
3720     while (slp != NULL) {
3721       if (slp->choice == SEQLOC_PNT) {
3722         spp = slp->data.ptrvalue;
3723         if (spp != NULL) {
3724           sintp = SeqIntNew();
3725           if (sintp != NULL) {
3726             sintp->id = SeqIdDup (spp->id);
3727             sintp->from = spp->point;
3728             sintp->to = spp->point;
3729             sintp->strand = SeqLocStrand (sfp->location);
3730             slp->choice = SEQLOC_INT;
3731             slp->data.ptrvalue = sintp;
3732             SeqPntFree (spp);
3733           }
3734         }
3735       }
3736       if (slp->choice == SEQLOC_INT) {
3737         sintp = slp->data.ptrvalue;
3738         if (sintp == NULL) {
3739           MemFree (pos);
3740           return FALSE;
3741         }
3742         sintp->from += offset;
3743         sintp->to += offset;
3744         if (sintp->from > sintp->to) {
3745           temp = sintp->from;
3746           sintp->from = sintp->to;
3747           sintp->to = temp;
3748         }
3749         sintp->strand = SeqLocStrand (sfp->location);
3750       } else {
3751         packed_int = FALSE;
3752       }
3753       slp = SeqLocFindNext (cbp->loc, slp);
3754     }
3755     slp = cbp->loc;
3756     if (packed_int && slp->choice == SEQLOC_MIX) {
3757       slp->choice = SEQLOC_PACKED_INT;
3758     }
3759   }
3760 
3761   /* add to code break list */
3762   lastcbp = crp->code_break;
3763   if (lastcbp == NULL) {
3764     crp->code_break = cbp;
3765   } else {
3766      while (lastcbp->next != NULL) {
3767       lastcbp = lastcbp->next;
3768     }
3769     lastcbp->next = cbp;
3770   }
3771   MemFree (pos);
3772   return TRUE;
3773 }
3774 
CodonsAlreadyInOrder(tRNAPtr trp)3775 static Boolean CodonsAlreadyInOrder (tRNAPtr trp)
3776 
3777 {
3778   Int2  i, j;
3779 
3780   if (trp == NULL) return TRUE;
3781   for (i = 0, j = 1; i < 5; i++, j++) {
3782     if (trp->codon [i] > trp->codon [j]) return FALSE;
3783   }
3784   return TRUE;
3785 }
3786 
SortCodons(VoidPtr ptr1,VoidPtr ptr2)3787 static int LIBCALLBACK SortCodons (VoidPtr ptr1, VoidPtr ptr2)
3788 
3789 {
3790   Uint1  codon1, codon2;
3791 
3792   if (ptr1 == NULL || ptr2 == NULL) return 0;
3793   codon1 = *((Uint1Ptr) ptr1);
3794   codon2 = *((Uint1Ptr) ptr2);
3795   if (codon1 > codon2) {
3796     return 1;
3797   } else if (codon1 < codon2) {
3798     return -1;
3799   }
3800   return 0;
3801 }
3802 
UniqueCodons(tRNAPtr trp)3803 static void UniqueCodons (tRNAPtr trp)
3804 
3805 {
3806   Int2   i, j;
3807   Uint1  last = 255, next;
3808 
3809   if (trp == NULL) return;
3810 
3811   for (i = 0, j = 0; i < 6; i++) {
3812     next = trp->codon [i];
3813     if (next != last) {
3814       trp->codon [j] = next;
3815       last = next;
3816       j++;
3817     }
3818   }
3819   while (j < 6) {
3820     trp->codon [j] = 255;
3821     j++;
3822   }
3823 }
3824 
3825 static CharPtr  codonLetterExpand [] =
3826 {
3827   "?", "A", "C", "AC",
3828   "G", "AG", "CG", "ACG",
3829   "T", "AT", "CT", "ACT",
3830   "GT", "AGT", "CGT", "ACGT",
3831   NULL
3832 };
3833 
ParseDegenerateCodon(tRNAPtr trp,Uint1Ptr codon)3834 NLM_EXTERN Boolean ParseDegenerateCodon (tRNAPtr trp, Uint1Ptr codon)
3835 
3836 {
3837   Uint1    ch;
3838   Uint1    chrToInt [256];
3839   Int2     k;
3840   Uint1    i, j;
3841   Uint1    idx;
3842   CharPtr  intToChr = "?ACMGRSVTWYHKDBN";
3843   CharPtr  ptr, str;
3844 
3845   if (trp == NULL || codon == NULL) return FALSE;
3846 
3847   for (i = 0; i < 2; i++) {
3848     ch = codon [i];
3849     if (ch != 'A' && ch != 'C' && ch != 'G' && ch != 'T') return FALSE;
3850   }
3851 
3852   for (k = 0; k < 256; k++) {
3853     chrToInt [k] = 0;
3854   }
3855   for (i = 1; i < 16; i++) {
3856     ch = intToChr [i];
3857     chrToInt [(int) ch] = i;
3858   }
3859 
3860   idx = chrToInt [(int) codon [2]];
3861   if (idx > 15) return FALSE;
3862 
3863   str = codonLetterExpand [idx];
3864   ptr = str;
3865   ch = *ptr;
3866   j = 0;
3867   codon [3] = '\0';
3868   while (ch != '\0' && j < 6) {
3869     codon [2] = ch;
3870     trp->codon [j] = IndexForCodon (codon, Seq_code_iupacna);
3871     ptr++;
3872     ch = *ptr;
3873     j++;
3874   }
3875 
3876   return TRUE;
3877 }
3878 
CleanupTrna(SeqFeatPtr sfp,tRNAPtr trp)3879 static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
3880 
3881 {
3882   Uint1           aa = 0;
3883   Uint1           curraa;
3884   Uint1           from = 0;
3885   Int2            j;
3886   Boolean         justTrnaText;
3887   SeqMapTablePtr  smtp;
3888   Uint1           trpcodon [6];
3889   /*
3890   Char            codon [16];
3891   Int2            i;
3892   Boolean         okayToFree = TRUE;
3893   CharPtr         str;
3894   */
3895 
3896   /* look for tRNA-OTHER with actual amino acid in comment */
3897 
3898   if (trp == NULL) return;
3899 
3900   /*
3901   if (sfp != NULL && sfp->comment != NULL && trp->codon [0] == 255) {
3902     codon [0] = '\0';
3903     if (StringNICmp (sfp->comment, "codon recognized: ", 18) == 0) {
3904       StringNCpy_0 (codon, sfp->comment + 18, sizeof (codon));
3905     } else if (StringNICmp (sfp->comment, "codons recognized: ", 19) == 0) {
3906       StringNCpy_0 (codon, sfp->comment + 19, sizeof (codon));
3907     }
3908     if (StringDoesHaveText (codon)) {
3909       if (StringLen (codon) > 3 && codon [3] == ';') {
3910         codon [3] = '\0';
3911         okayToFree = FALSE;
3912       }
3913       if (StringLen (codon) == 3) {
3914         for (i = 0; i < 3; i++) {
3915           if (codon [i] == 'U') {
3916             codon [i] = 'T';
3917           }
3918         }
3919         if (ParseDegenerateCodon (trp, (Uint1Ptr) codon)) {
3920           if (okayToFree) {
3921             sfp->comment = MemFree (sfp->comment);
3922           } else {
3923             str = StringSave (sfp->comment + 22);
3924             TrimSpacesAroundString (str);
3925             sfp->comment = MemFree (sfp->comment);
3926             if (StringHasNoText (str)) {
3927               str = MemFree (str);
3928             }
3929             sfp->comment = str;
3930           }
3931         }
3932       }
3933     }
3934   }
3935   */
3936 
3937   if (! CodonsAlreadyInOrder (trp)) {
3938     StableMergeSort ((VoidPtr) &(trp->codon), 6, sizeof (Uint1), SortCodons);
3939   }
3940   UniqueCodons (trp);
3941 
3942   /* now always switch iupacaa to ncbieaa (was just for selenocysteine) */
3943 
3944   if (trp->aatype == 1 /* && trp->aa == 'U' */) {
3945     trp->aatype = 2;
3946   }
3947 
3948   if (sfp == NULL || sfp->comment == NULL) return;
3949 
3950   if (trp->aatype == 2) {
3951     aa = trp->aa;
3952   } else {
3953     switch (trp->aatype) {
3954       case 0 :
3955         from = 0;
3956         break;
3957       case 1 :
3958         from = Seq_code_iupacaa;
3959         break;
3960       case 2 :
3961         from = Seq_code_ncbieaa;
3962         break;
3963       case 3 :
3964         from = Seq_code_ncbi8aa;
3965         break;
3966       case 4 :
3967         from = Seq_code_ncbistdaa;
3968         break;
3969       default:
3970         break;
3971     }
3972     smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
3973     if (smtp != NULL) {
3974       aa = SeqMapTableConvert (smtp, trp->aa);
3975     }
3976   }
3977   if (aa != 'X') {
3978     curraa = ParseTRnaString (sfp->comment, &justTrnaText, trpcodon, TRUE);
3979     if (aa == 0 && curraa != 0) {
3980       aa = curraa;
3981       trp->aa = curraa;
3982       trp->aatype = 2;
3983     }
3984     if (aa != 0 && aa == curraa) {
3985       if (justTrnaText) {
3986         for (j = 0; j < 6; j++) {
3987           if (trp->codon [j] == 255) {
3988             trp->codon [j] = trpcodon [j];
3989           }
3990         }
3991         if (StringCmp (sfp->comment, "fMet") != 0 && StringCmp (sfp->comment, "iMet") != 0) {
3992           sfp->comment = MemFree (sfp->comment);
3993         }
3994       }
3995     }
3996     return;
3997   }
3998   aa = ParseTRnaString (sfp->comment, &justTrnaText, trpcodon, TRUE);
3999   if (aa == 0) return;
4000   trp->aa = aa;
4001   trp->aatype = 2;
4002   if (justTrnaText) {
4003     for (j = 0; j < 6; j++) {
4004       if (trp->codon [j] == 255) {
4005         trp->codon [j] = trpcodon [j];
4006       }
4007     }
4008     if (StringCmp (sfp->comment, "fMet") != 0 && StringCmp (sfp->comment, "iMet") != 0) {
4009       sfp->comment = MemFree (sfp->comment);
4010     }
4011   }
4012 }
4013 
GetBestProteinFeatureUnindexed(SeqLocPtr product)4014 NLM_EXTERN SeqFeatPtr LIBCALL GetBestProteinFeatureUnindexed (SeqLocPtr product)
4015 
4016 {
4017   BioseqPtr    bsp;
4018   SeqFeatPtr   prot = NULL;
4019   SeqAnnotPtr  sap;
4020   SeqFeatPtr   tmp;
4021   ValNode      vn;
4022 
4023   if (product == NULL) return NULL;
4024   bsp = BioseqFindFromSeqLoc (product);
4025   if (bsp == NULL || bsp->repr != Seq_repr_raw) return NULL;
4026   vn.choice = SEQLOC_WHOLE;
4027   vn.data.ptrvalue = (Pointer) SeqIdFindBest (bsp->id, 0);
4028   vn.next = NULL;
4029   for (sap = bsp->annot; sap != NULL && prot == NULL; sap = sap->next) {
4030     if (sap->type == 1) {
4031       for (tmp = (SeqFeatPtr) sap->data; tmp != NULL && prot == NULL; tmp = tmp->next) {
4032         if (tmp->data.choice == SEQFEAT_PROT) {
4033           if (SeqLocCompare (tmp->location, &vn)) {
4034             /* find first protein feature packaged on and located on bioseq */
4035             prot = tmp;
4036           }
4037         }
4038       }
4039     }
4040   }
4041   return prot;
4042 }
4043 
CleanupECNumber(CharPtr str)4044 static void CleanupECNumber (CharPtr str)
4045 
4046 {
4047   size_t len;
4048 
4049   len = StringLen (str);
4050   if (len < 1) return;
4051   if (str [len - 1] == '.') {
4052     str [len - 1] = ' ';
4053   }
4054   if (StringNICmp (str, "EC ", 3) == 0) {
4055     str [0] = ' ';
4056     str [1] = ' ';
4057   } else if (StringNICmp (str, "EC:", 3) == 0) {
4058     str [0] = ' ';
4059     str [1] = ' ';
4060     str [2] = ' ';
4061   }
4062   TrimSpacesAroundString (str);
4063 }
4064 
ECNumberCanBeSplit(CharPtr str)4065 static Boolean ECNumberCanBeSplit (CharPtr str)
4066 
4067 {
4068   Char     ch;
4069   CharPtr  ptr;
4070 
4071   if (StringHasNoText (str)) return FALSE;
4072 
4073   ptr = str;
4074   ch = *ptr;
4075   while (ch != '\0') {
4076     if ((! IS_DIGIT (ch)) && ch != '.' && ch !='-' && ch !='n' && ch != ' ' && ch !=';') return FALSE;
4077     ptr++;
4078     ch = *ptr;
4079   }
4080 
4081   return TRUE;
4082 }
4083 
HandledGBQualOnCDS(SeqFeatPtr sfp,GBQualPtr gbq,ValNodePtr PNTR afterMe)4084 static Boolean HandledGBQualOnCDS (SeqFeatPtr sfp, GBQualPtr gbq, ValNodePtr PNTR afterMe)
4085 
4086 {
4087   Int2            choice = 0;
4088   CdRegionPtr     crp;
4089   Uint1           frame;
4090   ValNodePtr      gcp;
4091   ValNodePtr      prev;
4092   SeqFeatPtr      prot;
4093   ProtRefPtr      prp = NULL;
4094   Char            str [16];
4095   Int4            transl_table;
4096   int             val;
4097   ValNodePtr      vnp;
4098   SeqFeatXrefPtr  xref;
4099 
4100   if (StringICmp (gbq->qual, "product") == 0) {
4101     choice = 1;
4102   } else if (StringICmp (gbq->qual, "function") == 0) {
4103     choice = 2;
4104   } else if (StringICmp (gbq->qual, "EC_number") == 0) {
4105     choice = 3;
4106   } else if (StringICmp (gbq->qual, "prot_note") == 0) {
4107     choice = 4;
4108   }
4109   if (choice > 0) {
4110     prot = GetBestProteinFeatureUnindexed (sfp->product);
4111     if (prot != NULL) {
4112       prp = (ProtRefPtr) prot->data.value.ptrvalue;
4113     }
4114     if (prp == NULL) {
4115       /* otherwise make cross reference */
4116       xref = sfp->xref;
4117       while (xref != NULL && xref->data.choice != SEQFEAT_PROT) {
4118         xref = xref->next;
4119       }
4120       if (xref == NULL) {
4121         prp = ProtRefNew ();
4122         if (prp == NULL) return FALSE;
4123         xref = SeqFeatXrefNew ();
4124         if (xref == NULL) return FALSE;
4125         xref->data.choice = SEQFEAT_PROT;
4126         xref->data.value.ptrvalue = (Pointer) prp;
4127         xref->next = sfp->xref;
4128         sfp->xref = xref;
4129       }
4130       if (xref != NULL) {
4131         prp = (ProtRefPtr) xref->data.value.ptrvalue;
4132       }
4133     }
4134     if (prp == NULL) return FALSE;
4135     switch (choice) {
4136       case 1 :
4137         if (prot != NULL && prot->data.value.ptrvalue != NULL) {
4138           if (*afterMe == NULL) {
4139             /* if protein product exists, product gbqual becomes first name */
4140             vnp = ValNodeCopyStr (NULL, 0, gbq->val);
4141             if (vnp != NULL) {
4142               vnp->next = prp->name;
4143               prp->name = vnp;
4144             }
4145             *afterMe = vnp;
4146           } else {
4147             vnp = ValNodeCopyStr (NULL, 0, gbq->val);
4148             prev = *afterMe;
4149             if (vnp != NULL) {
4150               vnp->next = prev->next;
4151               prev->next = vnp;
4152             }
4153             *afterMe = vnp;
4154           }
4155         } else {
4156           /* if local xref, append to name */
4157           ValNodeCopyStr (&(prp->name), 0, gbq->val);
4158         }
4159         break;
4160       case 2 :
4161         ValNodeCopyStr (&(prp->activity), 0, gbq->val);
4162         break;
4163       case 3 :
4164         ValNodeCopyStr (&(prp->ec), 0, gbq->val);
4165         break;
4166       case 4 :
4167         if (prot == NULL) {
4168           return FALSE;
4169         } else {
4170           prot->comment = StringSave (gbq->val);
4171         }
4172         break;
4173       default :
4174         break;
4175     }
4176     return TRUE;
4177   }
4178 
4179   if (StringICmp (gbq->qual, "transl_except") == 0) {
4180     return ParseCodeBreak (sfp, gbq->val, 0);
4181   }
4182 
4183   if (StringICmp (gbq->qual, "codon_start") == 0) {
4184     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
4185     if (crp != NULL) {
4186       frame = crp->frame;
4187       if (frame == 0) {
4188         StringNCpy_0 (str, gbq->val, sizeof (str));
4189         if (sscanf (str, "%d", &val) == 1) {
4190           if (val > 0 && val < 4) {
4191             crp->frame = (Uint1) val;
4192             return TRUE;
4193           }
4194         }
4195         frame = 1;
4196       }
4197       sprintf (str, "%d", (int) frame);
4198       if (StringICmp (str, gbq->val) == 0) {
4199         return TRUE;
4200       } else if (sfp->pseudo && sfp->product == NULL) {
4201         StringNCpy_0 (str, gbq->val, sizeof (str));
4202         if (sscanf (str, "%d", &val) == 1) {
4203           if (val > 0 && val < 4) {
4204             crp->frame = (Uint1) val;
4205             return TRUE;
4206           }
4207         }
4208       }
4209     }
4210   }
4211 
4212   if (StringICmp (gbq->qual, "transl_table") == 0) {
4213     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
4214     if (crp != NULL) {
4215       transl_table = 0;
4216       gcp = crp->genetic_code;
4217       if (gcp != NULL) {
4218         for (vnp = gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4219           if (vnp->choice == 2 && vnp->data.intvalue != 0) {
4220             transl_table = vnp->data.intvalue;
4221           }
4222         }
4223         if (transl_table == 0) {
4224           transl_table = 1;
4225         }
4226         sprintf (str, "%ld", (long) transl_table);
4227         if (StringICmp (str, gbq->val) == 0) {
4228           return TRUE;
4229         }
4230       } else {
4231         StringNCpy_0 (str, gbq->val, sizeof (str));
4232         if (sscanf (str, "%d", &val) == 1) {
4233           vnp = ValNodeNew (NULL);
4234           if (vnp != NULL) {
4235             vnp->choice = 2;
4236             vnp->data.intvalue = (Int4) val;
4237             gcp = GeneticCodeNew ();
4238             if (gcp != NULL) {
4239               gcp->data.ptrvalue = vnp;
4240               crp->genetic_code = gcp;
4241               return TRUE;
4242             }
4243           }
4244         }
4245       }
4246     }
4247   }
4248 
4249   if (StringICmp (gbq->qual, "translation") == 0) {
4250     return TRUE;
4251   }
4252 
4253   return FALSE;
4254 }
4255 
4256 
HandledGBQualOnRNA(SeqFeatPtr sfp,GBQualPtr gbq,Boolean isEmblOrDdbj)4257 static Boolean HandledGBQualOnRNA (SeqFeatPtr sfp, GBQualPtr gbq, Boolean isEmblOrDdbj)
4258 
4259 {
4260   Uint1      aa;
4261   BioseqPtr  bsp;
4262   Uint1      codon [6];
4263   Boolean    emptyRNA;
4264   Int4       from;
4265   Boolean    is_fMet = FALSE;
4266   Boolean    is_iMet = FALSE;
4267   Boolean    is_std_name = FALSE;
4268   Int2       j;
4269   Boolean    justTrnaText;
4270   size_t     len;
4271   CharPtr    name;
4272   CharPtr    ptr;
4273   RNAGenPtr  rgp;
4274   RnaRefPtr  rrp;
4275   SeqIntPtr  sintp;
4276   SeqIdPtr   sip;
4277   CharPtr    str;
4278   Char       tmp [64];
4279   Int4       to;
4280   tRNAPtr    trp;
4281   long int   val;
4282 
4283   is_std_name = (Boolean) (StringICmp (gbq->qual, "standard_name") == 0);
4284   if (StringICmp (gbq->qual, "product") == 0 ||
4285       (is_std_name && (! isEmblOrDdbj) )) {
4286     rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4287     if (rrp == NULL) return FALSE;
4288     if (rrp->type == 0) {
4289       rrp->type = 255;
4290     }
4291     if (rrp->type == 255 && is_std_name) return FALSE;
4292     if (rrp->ext.choice == 1) {
4293       name = (CharPtr) rrp->ext.value.ptrvalue;
4294       if (StringHasNoText (name)) {
4295         rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4296         rrp->ext.choice = 0;
4297       }
4298     }
4299     if (rrp->ext.choice == 2) {
4300       trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4301       if (trp != NULL) {
4302         if (trp->aatype == 0 && trp->aa == 0 && trp->anticodon == NULL) {
4303           emptyRNA = TRUE;
4304           for (j = 0; j < 6; j++) {
4305             if (trp->codon [j] != 255) {
4306               emptyRNA = FALSE;
4307             }
4308           }
4309           if (emptyRNA) {
4310             rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4311             rrp->ext.choice = 0;
4312           }
4313         }
4314       }
4315     }
4316     if (rrp->type == 3 && rrp->ext.choice == 1) {
4317       name = (CharPtr) rrp->ext.value.ptrvalue;
4318       aa = ParseTRnaString (name, &justTrnaText, codon, FALSE);
4319       if (aa != 0) {
4320         is_fMet = (Boolean) (StringStr (name, "fMet") != NULL);
4321         is_iMet = (Boolean) (StringStr (name, "iMet") != NULL);
4322         rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4323         trp = (tRNAPtr) MemNew (sizeof (tRNA));
4324         if (trp != NULL) {
4325           trp->aatype = 2;
4326           for (j = 0; j < 6; j++) {
4327             trp->codon [j] = 255;
4328           }
4329           if (justTrnaText) {
4330             for (j = 0; j < 6; j++) {
4331               trp->codon [j] = codon [j];
4332             }
4333           }
4334           trp->aa = aa;
4335           rrp->ext.choice = 2;
4336           rrp->ext.value.ptrvalue = (Pointer) trp;
4337           if (aa == 'M') {
4338             if (is_fMet) {
4339               if (sfp->comment == NULL) {
4340                 sfp->comment = StringSave ("fMet");
4341               } else {
4342                 len = StringLen (sfp->comment) + StringLen ("fMet") + 5;
4343                 str = MemNew (sizeof (Char) * len);
4344                 StringCpy (str, sfp->comment);
4345                 StringCat (str, "; ");
4346                 StringCat (str, "fMet");
4347                 sfp->comment = MemFree (sfp->comment);
4348                 sfp->comment = str;
4349               }
4350             }
4351             if (is_iMet) {
4352               if (sfp->comment == NULL) {
4353                 sfp->comment = StringSave ("iMet");
4354               } else {
4355                 len = StringLen (sfp->comment) + StringLen ("iMet") + 5;
4356                 str = MemNew (sizeof (Char) * len);
4357                 StringCpy (str, sfp->comment);
4358                 StringCat (str, "; ");
4359                 StringCat (str, "iMet");
4360                 sfp->comment = MemFree (sfp->comment);
4361                 sfp->comment = str;
4362               }
4363             }
4364           }
4365           CleanupTrna (sfp, trp);
4366         }
4367       }
4368     }
4369     if (rrp->type == 3 && rrp->ext.choice == 0) {
4370       AddQualifierToFeature (sfp, "product", gbq->val);
4371       return TRUE;
4372     }
4373     if (rrp->type == 3 && rrp->ext.choice == 2) {
4374       trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4375       if (trp != NULL && trp->aatype == 2) {
4376         if (trp->aa == 77) {
4377           if (StringICmp (gbq->val, "tRNA-fMet") == 0 || StringICmp (gbq->val, "tRNA-iMet") == 0) return FALSE;
4378         }
4379         if (trp->aa == ParseTRnaString (gbq->val, NULL, NULL, FALSE)) {
4380           return TRUE;
4381         }
4382       }
4383     }
4384     if (rrp->ext.choice == 3) {
4385       rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
4386       if (rgp == NULL) return FALSE;
4387       if (StringHasNoText (rgp->product)) {
4388         rgp->product = StringSave (gbq->val);
4389         return TRUE;
4390       }
4391       return FALSE;
4392     }
4393     if (rrp->ext.choice != 0 && rrp->ext.choice != 1) return FALSE;
4394     name = (CharPtr) rrp->ext.value.ptrvalue;
4395     if (! HasNoText (name)) {
4396       if (StringICmp (name, gbq->val) == 0) {
4397         return TRUE;
4398       }
4399       str = StringStr (gbq->val, "rDNA");
4400       if (str != NULL) {
4401         str [1] = 'R';
4402         if (StringICmp (name, gbq->val) == 0) {
4403           return TRUE;
4404         }
4405       }
4406       if (rrp->type == 255 || rrp->type == 8 || rrp->type == 9 || rrp->type == 10) {
4407         /* new convention follows ASN.1 spec comments, allows new RNA types */
4408         return FALSE;
4409       }
4410       /* subsequent /product now added to comment */
4411       if (sfp->comment == NULL) {
4412         sfp->comment = gbq->val;
4413         gbq->val = NULL;
4414       } else if (StringStr (gbq->val, sfp->comment) == NULL) {
4415         len = StringLen (sfp->comment) + StringLen (gbq->val) + 5;
4416         str = MemNew (sizeof (Char) * len);
4417         StringCpy (str, sfp->comment);
4418         StringCat (str, "; ");
4419         StringCat (str, gbq->val);
4420         sfp->comment = MemFree (sfp->comment);
4421         sfp->comment = str;
4422       }
4423       /* return FALSE; */
4424       return TRUE;
4425     }
4426     if (rrp->type == 8 || rrp->type == 9 || rrp->type == 10) {
4427       /* new convention follows ASN.1 spec comments, allows new RNA types */
4428       return FALSE;
4429     }
4430     if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
4431       rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4432     }
4433     if (rrp->ext.choice == 0 || rrp->ext.choice == 1) {
4434       rrp->ext.choice = 1;
4435       rrp->ext.value.ptrvalue = StringSave (gbq->val);
4436       return TRUE;
4437     }
4438   } else if (StringICmp (gbq->qual, "anticodon") == 0) {
4439     rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4440     if (rrp == NULL) return FALSE;
4441     if (rrp->type == 0) {
4442       rrp->type = 255;
4443     }
4444     if (rrp->type == 3 && rrp->ext.choice == 0) {
4445       trp = (tRNAPtr) MemNew (sizeof (tRNA));
4446       if (trp != NULL) {
4447         rrp->ext.choice = 2;
4448         rrp->ext.value.ptrvalue = trp;
4449         for (j = 0; j < 6; j++) {
4450           trp->codon [j] = 255;
4451         }
4452       }
4453     }
4454     if (rrp->type == 3 && rrp->ext.choice == 2) {
4455       trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4456       if (trp != NULL) {
4457         StringNCpy_0 (tmp, gbq->val, sizeof (tmp));
4458         ptr = StringStr (tmp, "(");
4459         if (ptr != NULL) {
4460           ptr = StringStr (ptr + 1, "pos");
4461           if (ptr != NULL) {
4462             ptr = StringStr (ptr + 3, ":");
4463           }
4464         }
4465         if (ptr != NULL) {
4466           str = ptr + 1;
4467           ptr = StringStr (str, "..");
4468           if (ptr != NULL) {
4469             *ptr = '\0';
4470             if (sscanf (str, "%ld", &val) == 1) {
4471               from = val - 1;
4472               str = ptr + 2;
4473               ptr = StringStr (str, ",");
4474               if (ptr != NULL) {
4475                 *ptr = '\0';
4476                 if (sscanf (str, "%ld", &val) == 1) {
4477                   to = val - 1;
4478                   sip = SeqLocId (sfp->location);
4479                   if (sip != NULL) {
4480                     bsp = BioseqFind (sip);
4481                     if (bsp != NULL) {
4482                       if (from >= 0 && from < bsp->length - 1) {
4483                         if (to >= 0 && to < bsp->length - 1) {
4484                           sintp = SeqIntNew ();
4485                           if (sintp != NULL) {
4486                             if (from > to) {
4487                               sintp->from = to;
4488                               sintp->to = from;
4489                               sintp->strand = Seq_strand_minus;
4490                             } else {
4491                               sintp->from = from;
4492                               sintp->to = to;
4493                               sintp->strand = Seq_strand_plus;
4494                             }
4495                             sintp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
4496                             trp->anticodon = ValNodeAddPointer (NULL, SEQLOC_INT, (Pointer) sintp);
4497                             if (trp->aatype == 0 && trp->aa == 0) {
4498                               ptr = StringStr (ptr + 1, "aa:");
4499                               if (ptr != NULL) {
4500                                 str = ptr + 3;
4501                                 ptr = StringStr (str, ")");
4502                                 if (ptr != NULL) {
4503                                   *ptr = '\0';
4504                                   trp->aa = ParseTRnaString (str, NULL, NULL, FALSE);
4505                                   if (trp->aa != 0) {
4506                                     trp->aatype = 2;
4507                                   }
4508                                 }
4509                               }
4510                             }
4511                             return TRUE;
4512                           }
4513                         }
4514                       }
4515                     }
4516                   }
4517                 }
4518               }
4519             }
4520           }
4521         }
4522       }
4523     }
4524   }
4525   return FALSE;
4526 }
4527 
HandledGBQualOnProt(SeqFeatPtr sfp,GBQualPtr gbq)4528 static Boolean HandledGBQualOnProt (SeqFeatPtr sfp, GBQualPtr gbq)
4529 
4530 {
4531   Int2        choice = 0;
4532   ProtRefPtr  prp;
4533   ValNodePtr  vnp;
4534 
4535   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
4536   if (prp == NULL) return FALSE;
4537   if (StringICmp (gbq->qual, "product") == 0) {
4538     choice = 1;
4539   } else if (StringICmp (gbq->qual, "function") == 0) {
4540     choice = 2;
4541   } else if (StringICmp (gbq->qual, "EC_number") == 0) {
4542     choice = 3;
4543   } else if (StringICmp (gbq->qual, "standard_name") == 0) {
4544     choice = 4;
4545   } else if (StringICmp (gbq->qual, "label") == 0) {
4546     choice = 5;
4547   } else if (StringICmp (gbq->qual, "allele") == 0) {
4548       choice = 6;
4549   }
4550   if (choice == 1 || choice == 4) {
4551     vnp = prp->name;
4552     if (vnp != NULL && (! HasNoText (vnp->data.ptrvalue))) return FALSE;
4553     ValNodeCopyStr (&(prp->name), 0, gbq->val);
4554     /*
4555     vnp = prp->name;
4556     if (vnp != NULL && prp->desc != NULL) {
4557       if (StringICmp (vnp->data.ptrvalue, prp->desc) == 0) {
4558         prp->desc = MemFree (prp->desc);
4559       }
4560     }
4561     */
4562     return TRUE;
4563   } else if (choice == 2) {
4564     ValNodeCopyStr (&(prp->activity), 0, gbq->val);
4565     return TRUE;
4566   } else if (choice == 3) {
4567     ValNodeCopyStr (&(prp->ec), 0, gbq->val);
4568     return TRUE;
4569   } else if (choice == 5) {
4570     return FALSE; /* keep label gbqual only */
4571   } else if (choice == 6) {
4572       return FALSE;
4573   }
4574 
4575   if (StringICmp (gbq->qual, "experiment") == 0 ||
4576       StringICmp (gbq->qual, "inference") == 0) {
4577     return FALSE;
4578   }
4579 
4580   if (StringICmp (gbq->qual, "UniProtKB_evidence") == 0) {
4581     return FALSE;
4582   }
4583 
4584   return TRUE; /* all other gbquals not appropriate on protein features */
4585 }
4586 
HandledGBQualOnImp(SeqFeatPtr sfp,GBQualPtr gbq)4587 static Boolean HandledGBQualOnImp (SeqFeatPtr sfp, GBQualPtr gbq)
4588 
4589 {
4590   Char        ch;
4591   ImpFeatPtr  ifp;
4592   Int4        len;
4593   CharPtr     ptr;
4594 
4595   if (StringICmp (gbq->qual, "rpt_unit") == 0) {
4596     if (HasNoText (gbq->val)) return FALSE;
4597     ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4598     if (ifp == NULL) return FALSE;
4599     if (StringICmp (ifp->key, "repeat_region") != 0) return FALSE;
4600     len = SeqLocLen (sfp->location);
4601     if (len != (Int4) StringLen (gbq->val)) return FALSE;
4602     ptr = gbq->val;
4603     ch = *ptr;
4604     while (ch != '\0') {
4605       if (StringChr ("ACGTNacgtn", ch) == NULL) return FALSE;
4606       ptr++;
4607       ch = *ptr;
4608     }
4609     /* return TRUE; */
4610   }
4611   return FALSE;
4612 }
4613 
CleanupRptUnit(GBQualPtr gbq)4614 static void CleanupRptUnit (GBQualPtr gbq)
4615 
4616 {
4617   Char     ch;
4618   size_t   len;
4619   CharPtr  ptr;
4620   CharPtr  str;
4621   CharPtr  tmp;
4622 
4623   if (gbq == NULL) return;
4624   if (StringHasNoText (gbq->val)) return;
4625   len = StringLen (gbq->val) * 2 + 1;
4626   str = MemNew (sizeof (Char) * len);
4627   if (str == NULL) return;
4628   ptr = str;
4629   tmp = gbq->val;
4630   ch = *tmp;
4631   while (ch != '\0') {
4632     while (ch == '(' || ch == ')' || ch == ',') {
4633       *ptr = ch;
4634       ptr++;
4635       tmp++;
4636       ch = *tmp;
4637     }
4638     while (IS_WHITESP (ch)) {
4639       tmp++;
4640       ch = *tmp;
4641     }
4642     while (IS_DIGIT (ch)) {
4643       *ptr = ch;
4644       ptr++;
4645       tmp++;
4646       ch = *tmp;
4647     }
4648     if (ch == '.' || ch == '-') {
4649       while (ch == '.' || ch == '-') {
4650         tmp++;
4651         ch = *tmp;
4652       }
4653       *ptr = '.';
4654       ptr++;
4655       *ptr = '.';
4656       ptr++;
4657     }
4658     while (IS_WHITESP (ch)) {
4659       tmp++;
4660       ch = *tmp;
4661     }
4662     while (IS_DIGIT (ch)) {
4663       *ptr = ch;
4664       ptr++;
4665       tmp++;
4666       ch = *tmp;
4667     }
4668     while (IS_WHITESP (ch)) {
4669       tmp++;
4670       ch = *tmp;
4671     }
4672     if (ch == '\0' || ch == '(' || ch == ')' || ch == ',' || ch == '.' || IS_WHITESP (ch) || IS_DIGIT (ch)) {
4673     } else {
4674       MemFree (str);
4675       /* lower case the contents */
4676       ptr = gbq->val;
4677       ch = *ptr;
4678       while (ch != '\0') {
4679         if (IS_UPPER (ch)) {
4680           *ptr = TO_LOWER (ch);
4681         }
4682         ptr++;
4683         ch = *ptr;
4684       }
4685       return;
4686     }
4687   }
4688   *ptr = '\0';
4689   gbq->val = MemFree (gbq->val);
4690   gbq->val = str;
4691   /* and lower case the contents */
4692   ptr = str;
4693   ch = *ptr;
4694   while (ch != '\0') {
4695     if (IS_UPPER (ch)) {
4696       *ptr = TO_LOWER (ch);
4697     }
4698     ptr++;
4699     ch = *ptr;
4700   }
4701 }
4702 
CleanupRptUnitSeq(GBQualPtr gbq)4703 static void CleanupRptUnitSeq (GBQualPtr gbq)
4704 
4705 {
4706   Char     ch;
4707   CharPtr  ptr;
4708 
4709   if (gbq == NULL) return;
4710   if (StringHasNoText (gbq->val)) return;
4711 
4712   /* do not clean if val contains non-sequence characters */
4713   ptr = gbq->val;
4714   ch = *ptr;
4715   while (ch != '\0') {
4716     if (StringChr ("ACGTUacgtu", ch) == NULL) return;
4717     ptr++;
4718     ch = *ptr;
4719   }
4720 
4721   /* lower case, and convert U to T */
4722   ptr = gbq->val;
4723   ch = *ptr;
4724   while (ch != '\0') {
4725     if (IS_UPPER (ch)) {
4726       ch = TO_LOWER (ch);
4727       *ptr = ch;
4728     }
4729     if (ch == 'u') {
4730       ch = 't';
4731       *ptr = ch;
4732     }
4733     ptr++;
4734     ch = *ptr;
4735   }
4736 }
4737 
CleanupRptUnitRange(GBQualPtr gbq)4738 static void CleanupRptUnitRange (GBQualPtr gbq)
4739 
4740 {
4741   Char     ch;
4742   Int2     dashes = 0;
4743   Int2     dots = 0;
4744   size_t   len;
4745   CharPtr  ptr;
4746   CharPtr  str;
4747   CharPtr  tmp;
4748 
4749   if (gbq == NULL) return;
4750   if (StringHasNoText (gbq->val)) return;
4751   ptr = gbq->val;
4752   ch = *ptr;
4753   while (ch != '\0') {
4754     if (ch == '-') {
4755       dashes++;
4756     } else if (ch == '.') {
4757       dots++;
4758     } else if (IS_DIGIT (ch)) {
4759       /* okay */
4760     } else return;
4761     ptr++;
4762     ch = *ptr;
4763   }
4764 
4765   if (dashes > 0 && dots == 0) {
4766     len = StringLen (gbq->val + dashes);
4767     str = (CharPtr) MemNew (sizeof (Char) * (len + 5));
4768     tmp = str;
4769     ptr = gbq->val;
4770     ch = *ptr;
4771     while (ch != '\0') {
4772       if (ch == '-') {
4773         *tmp = '.';
4774         tmp++;
4775         *tmp = '.';
4776         tmp++;
4777       } else {
4778         *tmp = ch;
4779         tmp++;
4780       }
4781       ptr++;
4782       ch = *ptr;
4783     }
4784     gbq->val = MemFree (gbq->val);
4785     gbq->val = str;
4786   }
4787 }
4788 
CleanupReplace(GBQualPtr gbq)4789 static void CleanupReplace (GBQualPtr gbq)
4790 
4791 {
4792   Char     ch;
4793   CharPtr  ptr;
4794 
4795   if (gbq == NULL) return;
4796   if (StringHasNoText (gbq->val)) return;
4797   ptr = gbq->val;
4798   ch = *ptr;
4799   while (ch != '\0') {
4800     if (StringChr ("ACGTUacgtu", ch) == NULL) return;
4801     ptr++;
4802     ch = *ptr;
4803   }
4804   /* lower case, and convert U to T */
4805   ptr = gbq->val;
4806   ch = *ptr;
4807   while (ch != '\0') {
4808     if (IS_UPPER (ch)) {
4809       ch = TO_LOWER (ch);
4810       *ptr = ch;
4811     }
4812     if (ch == 'u') {
4813       ch = 't';
4814       *ptr = ch;
4815     }
4816     ptr++;
4817     ch = *ptr;
4818   }
4819 }
4820 
4821 static CharPtr evCategoryPfx [] = {
4822   "",
4823   "COORDINATES: ",
4824   "DESCRIPTION: ",
4825   "EXISTENCE: ",
4826   NULL
4827 };
4828 
CleanupInference(GBQualPtr gbq)4829 static void CleanupInference (GBQualPtr gbq)
4830 
4831 {
4832   Char     ch;
4833   CharPtr  colon;
4834   CharPtr  dst;
4835   Int2     j;
4836   size_t   len;
4837   CharPtr  ptr;
4838   CharPtr  skip;
4839   CharPtr  space;
4840   CharPtr  str;
4841 
4842   if (gbq == NULL) return;
4843   if (StringHasNoText (gbq->val)) return;
4844 
4845   str = gbq->val;
4846   space = NULL;
4847   colon = NULL;
4848 
4849   skip = NULL;
4850   for (j = 0; evCategoryPfx [j] != NULL; j++) {
4851     len = StringLen (evCategoryPfx [j]);
4852     if (StringNICmp (str, evCategoryPfx [j], len) != 0) continue;
4853     skip = str + len;
4854   }
4855   if (skip != NULL) {
4856     str = skip;
4857   }
4858 
4859   dst = str;
4860   ptr = str;
4861   ch = *ptr;
4862   while (ch != '\0') {
4863     *dst = ch;
4864     if (ch == ' ') {
4865       if (space == NULL) {
4866         space = dst;
4867       }
4868     } else if (ch == ':') {
4869       if (space != NULL) {
4870         dst = space;
4871         *dst = ch;
4872       }
4873       space = NULL;
4874       colon = dst;
4875     } else {
4876       if (space != NULL && colon != NULL) {
4877         colon++;
4878         dst = colon;
4879         *dst = ch;
4880       }
4881       space = NULL;
4882       colon = NULL;
4883     }
4884     dst++;
4885     ptr++;
4886     ch = *ptr;
4887   }
4888   *dst = '\0';
4889 
4890   dst = str;
4891   ptr = str;
4892   ch = *ptr;
4893   while (ch != '\0') {
4894     *dst = ch;
4895     if ((ch == ':' || ch == ',') && *(ptr + 1) == '?' && *(ptr + 2) == '|') {
4896       ptr += 2;
4897     }
4898     dst++;
4899     ptr++;
4900     ch = *ptr;
4901   }
4902   *dst = '\0';
4903 }
4904 
4905 static CharPtr evCategoryNoSpace [] = {
4906   "",
4907   "COORDINATES:",
4908   "DESCRIPTION:",
4909   "EXISTENCE:",
4910   NULL
4911 };
4912 
RepairInference(GBQualPtr gbq)4913 static void RepairInference (GBQualPtr gbq)
4914 
4915 {
4916   Int2     j;
4917   size_t   len;
4918   CharPtr  ptr;
4919   CharPtr  skip;
4920   CharPtr  str;
4921 
4922   if (gbq == NULL) return;
4923   if (StringHasNoText (gbq->val)) return;
4924 
4925   str = gbq->val;
4926   for (j = 0; evCategoryNoSpace [j] != NULL; j++) {
4927     len = StringLen (evCategoryNoSpace [j]);
4928     if (StringNICmp (str, evCategoryNoSpace [j], len) != 0) continue;
4929     if (StringNICmp (str, evCategoryPfx [j], len + 1) == 0) continue;
4930     /* need to repair */
4931     skip = str + len;
4932     ptr = MemNew (StringLen (skip) + 20);
4933     if (ptr == NULL) return;
4934     StringCpy (ptr, evCategoryPfx [j]);
4935     StringCat (ptr, skip);
4936     gbq->val = MemFree (gbq->val);
4937     gbq->val = ptr;
4938     return;
4939   }
4940 }
4941 
CleanupConsSplice(GBQualPtr gbq)4942 static void CleanupConsSplice (GBQualPtr gbq)
4943 
4944 {
4945   size_t   len;
4946   CharPtr  ptr;
4947   CharPtr  str;
4948 
4949   if (StringNICmp (gbq->val, "(5'site:", 8) != 0) return;
4950   ptr = StringStr (gbq->val, ",3'site:");
4951   if (ptr == NULL) return;
4952   len = StringLen (gbq->val) + 5;
4953   str = (CharPtr) MemNew (len);
4954   if (str == NULL) return;
4955   *ptr = '\0';
4956   ptr++;
4957   StringCpy (str, gbq->val);
4958   StringCat (str, ", ");
4959   StringCat (str, ptr);
4960   gbq->val = MemFree (gbq->val);
4961   gbq->val = str;
4962 }
4963 
ExpandParenGroup(GBQualPtr headgbq)4964 static Boolean ExpandParenGroup (GBQualPtr headgbq)
4965 
4966 {
4967   Char       ch;
4968   GBQualPtr  lastgbq;
4969   size_t     len;
4970   Int2       nesting;
4971   GBQualPtr  newgbq;
4972   GBQualPtr  nextqual;
4973   CharPtr    ptr;
4974   CharPtr    str;
4975   CharPtr    tmp;
4976 
4977   nextqual = headgbq->next;
4978   lastgbq = headgbq;
4979   ptr = headgbq->val;
4980   tmp = StringSave (ptr + 1);
4981   len = StringLen (tmp);
4982   if (len > 0 && tmp [len - 1] == ')') {
4983     tmp [len - 1] = '\0';
4984   }
4985   str = tmp;
4986   nesting = 0;
4987   ptr = str;
4988   ch = *ptr;
4989   while (ch != '\0') {
4990     if (ch == '(') {
4991       nesting++;
4992     } else if (ch == ')') {
4993       nesting--;
4994       if (nesting < 0) {
4995         MemFree (tmp);
4996         return FALSE;
4997       }
4998     } else if (ch == ',') {
4999       if (nesting < 0) {
5000         MemFree (tmp);
5001         return FALSE;
5002       }
5003     }
5004     ptr++;
5005     ch = *ptr;
5006   }
5007   while (! StringHasNoText (str)) {
5008     ptr = StringChr (str, ',');
5009     if (ptr == NULL) {
5010       ptr = StringRChr (str, ')');
5011     }
5012     if (ptr != NULL) {
5013       *ptr = '\0';
5014       ptr++;
5015     }
5016     TrimSpacesAroundString (str);
5017     newgbq = GBQualNew ();
5018     if (newgbq != NULL) {
5019       newgbq->qual = StringSave (headgbq->qual);
5020       newgbq->val = StringSave (str);
5021       newgbq->next = nextqual;
5022       lastgbq->next = newgbq;
5023       lastgbq = newgbq;
5024     }
5025     str = ptr;
5026   }
5027   MemFree (tmp);
5028   return TRUE;
5029 }
5030 
IsBaseRange(CharPtr str)5031 static Boolean IsBaseRange (CharPtr str)
5032 
5033 {
5034   CharPtr   ptr;
5035   Char      tmp [32];
5036   long int  val;
5037 
5038   if (StringLen (str) > 25) return FALSE;
5039   StringNCpy_0 (tmp, str, sizeof (tmp));
5040   ptr = StringStr (tmp, "..");
5041   if (ptr == NULL) return FALSE;
5042   *ptr = '\0';
5043   if (StringHasNoText (tmp)) return FALSE;
5044   if (sscanf (tmp, "%ld", &val) != 1 || val < 1) return FALSE;
5045   ptr += 2;
5046   if (StringHasNoText (ptr)) return FALSE;
5047   if (sscanf (ptr, "%ld", &val) != 1 || val < 1) return FALSE;
5048   return TRUE;
5049 }
5050 
ModernizeFeatureGBQuals(SeqFeatPtr sfp)5051 static void ModernizeFeatureGBQuals (SeqFeatPtr sfp)
5052 
5053 {
5054   GBQualPtr       gbq;
5055   size_t          len;
5056   GBQualPtr       nextqual;
5057   GBQualPtr PNTR  prevqual;
5058   CharPtr         str;
5059   Boolean         unlink;
5060 
5061   if (sfp == NULL) return;
5062   gbq = sfp->qual;
5063   prevqual = (GBQualPtr PNTR) &(sfp->qual);
5064   while (gbq != NULL) {
5065     CleanVisString (&(gbq->qual));
5066     CleanVisString (&(gbq->val));
5067     if (gbq->qual == NULL) {
5068       gbq->qual = StringSave ("");
5069     }
5070     if (StringIsJustQuotes (gbq->val)) {
5071       gbq->val = MemFree (gbq->val);
5072     }
5073     if (gbq->val == NULL) {
5074       gbq->val = StringSave ("");
5075     }
5076     nextqual = gbq->next;
5077     unlink = TRUE;
5078     if (StringICmp (gbq->qual, "rpt_unit_seq") == 0) {
5079       str = gbq->val;
5080       len = StringLen (str);
5081       if (len > 1 && *str == '{' && str [len - 1] == '}') {
5082         *str = '(';
5083         str [len - 1] = ')';
5084       }
5085       if (len > 1 && *str == '(' && str [len - 1] == ')' /* && StringChr (str + 1, '(') == NULL */) {
5086         if (ExpandParenGroup (gbq)) {
5087           nextqual = gbq->next;
5088           /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
5089           unlink = TRUE;
5090         } else {
5091           unlink = FALSE;
5092         }
5093       } else {
5094         unlink = FALSE;
5095       }
5096     } else if (StringICmp (gbq->qual, "rpt_type") == 0 ||
5097         StringICmp (gbq->qual, "rpt_unit") == 0 ||
5098         StringICmp (gbq->qual, "rpt_unit_range") == 0 ||
5099         StringICmp (gbq->qual, "rpt_unit_seq") == 0 ||
5100         StringICmp (gbq->qual, "replace") == 0 ||
5101         StringICmp (gbq->qual, "compare") == 0 ||
5102         StringICmp (gbq->qual, "old_locus_tag") == 0 ||
5103         StringICmp (gbq->qual, "usedin") == 0) {
5104       str = gbq->val;
5105       len = StringLen (str);
5106       if (len > 1 && *str == '{' && str [len - 1] == '}') {
5107         *str = '(';
5108         str [len - 1] = ')';
5109       }
5110       if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
5111         if (ExpandParenGroup (gbq)) {
5112           nextqual = gbq->next;
5113           /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
5114           unlink = TRUE;
5115         } else {
5116           unlink = FALSE;
5117         }
5118       } else {
5119         unlink = FALSE;
5120       }
5121     } else {
5122       unlink = FALSE;
5123     }
5124     if (unlink) {
5125       *(prevqual) = gbq->next;
5126       gbq->next = NULL;
5127       gbq->qual = MemFree (gbq->qual);
5128       gbq->val = MemFree (gbq->val);
5129       GBQualFree (gbq);
5130     } else {
5131       prevqual = (GBQualPtr PNTR) &(gbq->next);
5132     }
5133     gbq = nextqual;
5134   }
5135 }
5136 
5137 
MendSatelliteQualifier(CharPtr PNTR satellite)5138 static void MendSatelliteQualifier (CharPtr PNTR satellite)
5139 {
5140   Int4 microsatellite_len = StringLen ("microsatellite");
5141   Int4 minisatellite_len = StringLen ("minisatellite");
5142   Int4 satellite_len = StringLen ("satellite");
5143   Int4 type_len = 0;
5144   CharPtr new_qual, colon, src, dst;
5145 
5146   if (satellite == NULL || StringHasNoText (*satellite)) {
5147     return;
5148   }
5149 
5150   if (StringNCmp (*satellite, "microsatellite", microsatellite_len) == 0) {
5151     type_len = microsatellite_len;
5152   } else if (StringNCmp (*satellite, "minisatellite", minisatellite_len) == 0) {
5153     type_len = minisatellite_len;
5154   } else if (StringNCmp (*satellite, "satellite", satellite_len) == 0) {
5155     type_len = satellite_len;
5156   }
5157 
5158   if (type_len == 0) {
5159     new_qual = (CharPtr) MemNew (sizeof (Char) * (StringLen (*satellite) + satellite_len + 3));
5160     sprintf (new_qual, "satellite:%s", *satellite);
5161     *satellite = MemFree (*satellite);
5162     *satellite = new_qual;
5163   } else if (*(*satellite + type_len) == ' ') {
5164     *(*satellite + type_len) = ':';
5165   }
5166 
5167   /* remove spaces after colon */
5168   colon = StringChr (*satellite, ':');
5169   if (colon != NULL) {
5170     src = colon + 1;
5171     dst = colon + 1;
5172     while (*src == ' ') {
5173       src++;
5174     }
5175     while (*src != 0) {
5176       *dst = *src;
5177       dst++;
5178       src++;
5179     }
5180     *dst = 0;
5181   }
5182 }
5183 
5184 
CleanupFeatureGBQuals(SeqFeatPtr sfp,Boolean isEmblOrDdbj)5185 static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
5186 
5187 {
5188   ValNodePtr      afterMe = NULL;
5189   Boolean         all_digits;
5190   Char            ch;
5191   DbtagPtr        db;
5192   GBQualPtr       gbq;
5193   GeneRefPtr      grp;
5194   ImpFeatPtr      ifp;
5195   size_t          len;
5196   GBQualPtr       nextqual;
5197   ObjectIdPtr     oip;
5198   GBQualPtr PNTR  prevqual;
5199   CharPtr         ptr;
5200   GBQualPtr       rpt_unit_range = NULL;
5201   GBQualPtr       rpt_unit_seq = NULL;
5202   CharPtr         str;
5203   CharPtr         tag;
5204   Boolean         unlink;
5205   ValNodePtr      vnp;
5206   SeqFeatXrefPtr  xref;
5207 
5208   if (sfp == NULL) return;
5209   gbq = sfp->qual;
5210   prevqual = (GBQualPtr PNTR) &(sfp->qual);
5211   while (gbq != NULL) {
5212     CleanVisString (&(gbq->qual));
5213     CleanVisStringAndCompress (&(gbq->val));
5214     if (gbq->qual == NULL) {
5215       gbq->qual = StringSave ("");
5216     }
5217     if (StringIsJustQuotes (gbq->val)) {
5218       gbq->val = MemFree (gbq->val);
5219     }
5220     if (gbq->val == NULL) {
5221       gbq->val = StringSave ("");
5222     }
5223     if (StringICmp (gbq->qual, "replace") == 0) {
5224       if (sfp->data.choice == SEQFEAT_IMP) {
5225         ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5226         if (ifp != NULL) {
5227           if (StringICmp (ifp->key, "variation") == 0 && gbq->val != NULL) {
5228             ptr = gbq->val;
5229             ch = *ptr;
5230             while (ch != '\0') {
5231               *ptr = TO_LOWER (ch);
5232               ptr++;
5233               ch = *ptr;
5234             }
5235           }
5236         }
5237       }
5238     }
5239     nextqual = gbq->next;
5240     unlink = TRUE;
5241     if (StringICmp (gbq->qual, "partial") == 0) {
5242       sfp->partial = TRUE;
5243     } else if (StringICmp (gbq->qual, "evidence") == 0) {
5244       /*
5245       if (StringICmp (gbq->val, "experimental") == 0) {
5246         if (sfp->exp_ev != 2) {
5247           sfp->exp_ev = 1;
5248         }
5249       } else if (StringICmp (gbq->val, "not_experimental") == 0) {
5250         sfp->exp_ev = 2;
5251       }
5252       */
5253     } else if (StringICmp (gbq->qual, "exception") == 0) {
5254       sfp->excpt = TRUE;
5255       if (! HasNoText (gbq->val)) {
5256         if (StringICmp (gbq->val, "TRUE") != 0) {
5257           if (sfp->except_text == NULL) {
5258             sfp->except_text = StringSaveNoNull (gbq->val);
5259           }
5260         }
5261       }
5262     } else if (StringICmp (gbq->qual, "note") == 0 ||
5263                StringICmp (gbq->qual, "notes") == 0 ||
5264                StringICmp (gbq->qual, "comment") == 0) {
5265       if (sfp->comment == NULL) {
5266         sfp->comment = gbq->val;
5267         gbq->val = NULL;
5268       } else {
5269         len = StringLen (sfp->comment) + StringLen (gbq->val) + 5;
5270         str = MemNew (sizeof (Char) * len);
5271         StringCpy (str, sfp->comment);
5272         StringCat (str, "; ");
5273         StringCat (str, gbq->val);
5274         sfp->comment = MemFree (sfp->comment);
5275         sfp->comment = str;
5276       }
5277     } else if (StringICmp (gbq->qual, "label") == 0) {
5278       if (StringICmp (gbq->val, FindKeyFromFeatDefType (sfp->idx.subtype, FALSE)) == 0) {
5279         /* skip label that is simply the feature key */
5280       } else if (sfp->comment == NULL || StringISearch (sfp->comment, gbq->qual) == NULL) {
5281         /* if label is not already in comment, append */
5282         len = StringLen (sfp->comment) + StringLen (gbq->val) + StringLen ("label: ") + 5;
5283         str = MemNew (sizeof (Char) * len);
5284         if (sfp->comment == NULL) {
5285           StringCpy (str, "label: ");
5286           StringCat (str, gbq->val);
5287           sfp->comment = str;
5288         } else {
5289           StringCpy (str, sfp->comment);
5290           StringCat (str, "; ");
5291           StringCat (str, "label: ");
5292           StringCat (str, gbq->val);
5293           sfp->comment = MemFree (sfp->comment);
5294           sfp->comment = str;
5295         }
5296       }
5297     } else if (StringICmp (gbq->qual, "db_xref") == 0) {
5298       tag = gbq->val;
5299       ptr = StringChr (tag, ':');
5300       if (ptr != NULL) {
5301         vnp = ValNodeNew (NULL);
5302         db = DbtagNew ();
5303         vnp->data.ptrvalue = db;
5304         *ptr = '\0';
5305         ptr++;
5306         db->db = StringSave (tag);
5307         oip = ObjectIdNew ();
5308         oip->str = StringSave (ptr);
5309         db->tag = oip;
5310         vnp->next = sfp->dbxref;
5311         sfp->dbxref = vnp;
5312       } else {
5313         /*
5314         db->db = StringSave ("?");
5315         oip = ObjectIdNew ();
5316         oip->str = StringSave (tag);
5317         db->tag = oip;
5318         vnp->next = sfp->dbxref;
5319         sfp->dbxref = vnp;
5320         */
5321         unlink = FALSE;
5322       }
5323     } else if (StringICmp (gbq->qual, "gdb_xref") == 0) {
5324       vnp = ValNodeNew (NULL);
5325       db = DbtagNew ();
5326       vnp->data.ptrvalue = db;
5327       db->db = StringSave ("GDB");
5328       oip = ObjectIdNew ();
5329       oip->str = StringSave (gbq->val);
5330       db->tag = oip;
5331       vnp->next = sfp->dbxref;
5332       sfp->dbxref = vnp;
5333     } else if (StringICmp (gbq->qual, "cons_splice") == 0) {
5334       /*
5335       CleanupConsSplice (gbq);
5336       unlink = FALSE;
5337       */
5338     } else if (StringICmp (gbq->qual, "replace") == 0) {
5339       CleanupReplace (gbq);
5340       unlink = FALSE;
5341     } else if (StringICmp (gbq->qual, "rpt_unit_seq") == 0) {
5342       if (IsBaseRange (gbq->val)) {
5343         gbq->qual = MemFree (gbq->qual);
5344         gbq->qual = StringSave ("rpt_unit_range");
5345         CleanupRptUnitRange (gbq);
5346       } else {
5347         CleanupRptUnitSeq (gbq);
5348       }
5349       unlink = FALSE;
5350     } else if (StringICmp (gbq->qual, "rpt_unit_range") == 0) {
5351       if (! IsBaseRange (gbq->val)) {
5352         gbq->qual = MemFree (gbq->qual);
5353         gbq->qual = StringSave ("rpt_unit_seq");
5354         CleanupRptUnitSeq (gbq);
5355       } else {
5356         CleanupRptUnitRange (gbq);
5357       }
5358       unlink = FALSE;
5359     } else if (sfp->data.choice == SEQFEAT_GENE && HandledGBQualOnGene (sfp, gbq)) {
5360     } else if (sfp->data.choice == SEQFEAT_CDREGION && HandledGBQualOnCDS (sfp, gbq, &afterMe)) {
5361     } else if (sfp->data.choice == SEQFEAT_RNA && HandledGBQualOnRNA (sfp, gbq, isEmblOrDdbj)) {
5362     } else if (sfp->data.choice == SEQFEAT_PROT && HandledGBQualOnProt (sfp, gbq)) {
5363     } else if (sfp->data.choice == SEQFEAT_IMP && HandledGBQualOnImp (sfp, gbq)) {
5364     } else if (StringICmp (gbq->qual, "rpt_unit") == 0) {
5365       if (IsBaseRange (gbq->val)) {
5366         gbq->qual = MemFree (gbq->qual);
5367         gbq->qual = StringSave ("rpt_unit_range");
5368         unlink = FALSE;
5369       } else {
5370         gbq->qual = MemFree (gbq->qual);
5371         gbq->qual = StringSave ("rpt_unit_seq");
5372         unlink = FALSE;
5373       }
5374     } else if (StringICmp (gbq->qual, "EC_number") == 0) {
5375       CleanupECNumber (gbq->val);
5376       unlink = FALSE;
5377     } else if (StringICmp (gbq->qual, "pseudo") == 0) {
5378       sfp->pseudo = TRUE;
5379     } else if (StringICmp (gbq->qual, "pseudogene") == 0) {
5380       str = gbq->val;
5381       if (StringICmp (str, "processed") == 0 ||
5382           StringICmp (str, "unprocessed") == 0 ||
5383           StringICmp (str, "unitary") == 0 ||
5384           StringICmp (str, "allelic") == 0 ||
5385           StringICmp (str, "unknown") == 0) {
5386         sfp->pseudo = TRUE;
5387         ptr = str;
5388         ch = *ptr;
5389         while (ch != '\0') {
5390           if (IS_UPPER (ch)) {
5391             *ptr = TO_LOWER (ch);
5392           }
5393           ptr++;
5394           ch = *ptr;
5395         }
5396       }
5397       unlink = FALSE;
5398     } else if (StringICmp (gbq->qual, "ribosomal_slippage") == 0 ||
5399                StringICmp (gbq->qual, "ribosomal-slippage") == 0 ||
5400                StringICmp (gbq->qual, "ribosomal slippage") == 0) {
5401       sfp->excpt = TRUE;
5402       if (HasNoText (gbq->val)) {
5403         if (sfp->except_text == NULL) {
5404           sfp->except_text = StringSaveNoNull ("ribosomal slippage");
5405         }
5406       }
5407     } else if (StringICmp (gbq->qual, "trans_splicing") == 0 ||
5408                StringICmp (gbq->qual, "trans-splicing") == 0 ||
5409                StringICmp (gbq->qual, "trans splicing") == 0) {
5410       sfp->excpt = TRUE;
5411       if (HasNoText (gbq->val)) {
5412         if (sfp->except_text == NULL) {
5413           sfp->except_text = StringSaveNoNull ("trans-splicing");
5414         }
5415       }
5416     } else if (StringICmp (gbq->qual, "artificial_location") == 0 ||
5417                StringICmp (gbq->qual, "artificial-location") == 0 ||
5418                StringICmp (gbq->qual, "artificial location") == 0) {
5419       sfp->excpt = TRUE;
5420       if (HasNoText (gbq->val)) {
5421         if (sfp->except_text == NULL) {
5422           sfp->except_text = StringSaveNoNull ("artificial location");
5423         }
5424       }
5425     } else if (StringICmp (gbq->qual, "gene") == 0 && (! StringHasNoText (gbq->val))) {
5426       grp = GeneRefNew ();
5427       grp->locus = StringSave (gbq->val);
5428       xref = SeqFeatXrefNew ();
5429       xref->data.choice = SEQFEAT_GENE;
5430       xref->data.value.ptrvalue = (Pointer) grp;
5431       xref->specialCleanupFlag = TRUE; /* flag to test for overlapping gene later */
5432       xref->next = sfp->xref;
5433       sfp->xref = xref;
5434     } else if (sfp->data.choice != SEQFEAT_CDREGION && StringICmp (gbq->qual, "codon_start") == 0) {
5435       /* not legal on anything but CDS, so remove it */
5436     } else if (StringICmp (gbq->qual, "experiment") == 0 &&
5437                StringICmp (gbq->val, "experimental evidence, no additional details recorded") == 0) {
5438       /* remove default experiment string if instantiated */
5439     } else if (StringICmp (gbq->qual, "inference") == 0) {
5440       if (StringICmp (gbq->val, "non-experimental evidence, no additional details recorded") == 0) {
5441         /* remove default inference string if instantiated */
5442       } else {
5443         CleanupInference (gbq);
5444         RepairInference (gbq);
5445         unlink = FALSE;
5446       }
5447     } else if (StringICmp (gbq->qual, "transposon") == 0) {
5448       if (StringICmp (gbq->val, "class I integron") == 0 ||
5449           StringICmp (gbq->val, "class II integron") == 0 ||
5450           StringICmp (gbq->val, "class III integron") == 0 ||
5451           StringICmp (gbq->val, "class 1 integron") == 0 ||
5452           StringICmp (gbq->val, "class 2 integron") == 0 ||
5453           StringICmp (gbq->val, "class 3 integron") == 0) {
5454         len = StringLen ("integron") + StringLen (gbq->val) + 5;
5455         str = MemNew (sizeof (Char) * len);
5456         StringCpy (str, "integron");
5457         StringCat (str, ":");
5458         ptr = StringStr (gbq->val, " integron");
5459         if (ptr != NULL) {
5460           *ptr = '\0';
5461         }
5462         StringCat (str, gbq->val);
5463         gbq->val = MemFree (gbq->val);
5464         gbq->val = str;
5465         gbq->qual = MemFree (gbq->qual);
5466         gbq->qual = StringSave ("mobile_element");
5467         unlink = FALSE;
5468       } else {
5469         len = StringLen ("transposon") + StringLen (gbq->val) + 5;
5470         str = MemNew (sizeof (Char) * len);
5471         StringCpy (str, "transposon");
5472         StringCat (str, ":");
5473         StringCat (str, gbq->val);
5474         gbq->val = MemFree (gbq->val);
5475         gbq->val = str;
5476         gbq->qual = MemFree (gbq->qual);
5477         gbq->qual = StringSave ("mobile_element");
5478         unlink = FALSE;
5479       }
5480     } else if (StringICmp (gbq->qual, "insertion_seq") == 0) {
5481       len = StringLen ("insertion sequence") + StringLen (gbq->val) + 5;
5482       str = MemNew (sizeof (Char) * len);
5483       StringCpy (str, "insertion sequence");
5484       StringCat (str, ":");
5485       StringCat (str, gbq->val);
5486       gbq->val = MemFree (gbq->val);
5487       gbq->val = str;
5488       gbq->qual = MemFree (gbq->qual);
5489       gbq->qual = StringSave ("mobile_element");
5490       unlink = FALSE;
5491     } else if (StringCmp (gbq->qual, "satellite") == 0) {
5492       MendSatelliteQualifier(&(gbq->val));
5493       unlink = FALSE;
5494     } else {
5495       unlink = FALSE;
5496     }
5497 
5498     if (StringICmp (gbq->qual, "mobile_element") == 0) {
5499       if (sfp->data.choice == SEQFEAT_IMP) {
5500         ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5501         if (ifp != NULL) {
5502           if (StringICmp (ifp->key, "repeat_region") == 0 && gbq->val != NULL) {
5503             gbq->qual = MemFree (gbq->qual);
5504             gbq->qual = StringSave ("mobile_element_type");
5505             ifp->key = MemFree (ifp->key);
5506             ifp->key = StringSave ("mobile_element");
5507             sfp->idx.subtype = FEATDEF_mobile_element;
5508           }
5509         }
5510       }
5511     }
5512     if (StringICmp (gbq->qual, "mobile_element") == 0) {
5513       gbq->qual = MemFree (gbq->qual);
5514       gbq->qual = StringSave ("mobile_element_type");
5515     }
5516     if (StringICmp (gbq->qual, "mobile_element_type") == 0) {
5517       if (StringStr (gbq->val, " :") != NULL || StringStr (gbq->val, ": ") != NULL) {
5518         len = StringLen (gbq->val) + 5;
5519         ptr = StringChr (gbq->val, ':');
5520         if (ptr != NULL) {
5521           *ptr = '\0';
5522           ptr++;
5523           TrimSpacesAroundString (gbq->val);
5524           TrimSpacesAroundString (ptr);
5525           str = MemNew (sizeof (Char) * len);
5526           StringCpy (str, gbq->val);
5527           StringCat (str, ":");
5528           StringCat (str, ptr);
5529           gbq->val = MemFree (gbq->val);
5530           gbq->val = str;
5531         }
5532       }
5533     }
5534 
5535     if (StringICmp (gbq->qual, "estimated_length") == 0) {
5536       all_digits = TRUE;
5537       ptr = gbq->val;
5538       if (ptr != NULL) {
5539         ch = *ptr;
5540         while (ch != '\0') {
5541           if (! IS_DIGIT (ch)) {
5542             all_digits = FALSE;
5543           }
5544           ptr++;
5545           ch = *ptr;
5546         }
5547       }
5548       if (! all_digits) {
5549         if (StringICmp (gbq->val, "unknown") != 0) {
5550           MemFree (gbq->val);
5551           gbq->val = StringSave ("unknown");
5552         }
5553       }
5554     }
5555 
5556     if (sfp->data.choice == SEQFEAT_IMP) {
5557       ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5558       if (ifp != NULL) {
5559         if (StringICmp (ifp->key, "conflict") == 0 ) {
5560           ifp->key = MemFree (ifp->key);
5561           ifp->key = StringSave ("misc_difference");
5562           sfp->idx.subtype = FEATDEF_misc_difference;
5563           len = StringLen (sfp->comment) + StringLen ("conflict") + 5;
5564           str = MemNew (sizeof (Char) * len);
5565           if (sfp->comment == NULL) {
5566             StringCpy (str, "conflict");
5567             sfp->comment = str;
5568           } else {
5569             StringCpy (str, "conflict; ");
5570             StringCat (str, sfp->comment);
5571             sfp->comment = MemFree (sfp->comment);
5572             sfp->comment = str;
5573           }
5574         }
5575       }
5576     }
5577 
5578     if (rpt_unit_seq != NULL) {
5579       CleanupRptUnit (rpt_unit_seq);
5580     }
5581     if (rpt_unit_range != NULL) {
5582       CleanupRptUnit (rpt_unit_range);
5583     }
5584 
5585     if (StringHasNoText (gbq->qual) && StringHasNoText (gbq->val)) {
5586       unlink = TRUE;
5587     }
5588 
5589     if (unlink) {
5590       *(prevqual) = gbq->next;
5591       gbq->next = NULL;
5592       gbq->qual = MemFree (gbq->qual);
5593       gbq->val = MemFree (gbq->val);
5594       GBQualFree (gbq);
5595     } else {
5596       prevqual = (GBQualPtr PNTR) &(gbq->next);
5597     }
5598     gbq = nextqual;
5599   }
5600 }
5601 
SortByGBQualKeyAndVal(VoidPtr ptr1,VoidPtr ptr2)5602 static int LIBCALLBACK SortByGBQualKeyAndVal (VoidPtr ptr1, VoidPtr ptr2)
5603 
5604 {
5605   int        compare;
5606   GBQualPtr  gbq1;
5607   GBQualPtr  gbq2;
5608   CharPtr    str1;
5609   CharPtr    str2;
5610 
5611   if (ptr1 == NULL || ptr2 == NULL) return 0;
5612   gbq1 = *((GBQualPtr PNTR) ptr1);
5613   gbq2 = *((GBQualPtr PNTR) ptr2);
5614   if (gbq1 == NULL || gbq2 == NULL) return 0;
5615   str1 = (CharPtr) gbq1->qual;
5616   str2 = (CharPtr) gbq2->qual;
5617   if (str1 == NULL || str2 == NULL) return 0;
5618   compare = StringICmp (str1, str2);
5619   if (compare != 0) return compare;
5620   str1 = (CharPtr) gbq1->val;
5621   str2 = (CharPtr) gbq2->val;
5622   if (str1 == NULL || str2 == NULL) return 0;
5623   compare = StringICmp (str1, str2);
5624   return compare;
5625 }
5626 
GBQualsAlreadyInOrder(GBQualPtr list)5627 static Boolean GBQualsAlreadyInOrder (GBQualPtr list)
5628 
5629 {
5630   int        compare;
5631   GBQualPtr  curr;
5632   GBQualPtr  next;
5633 
5634   if (list == NULL || list->next == NULL) return TRUE;
5635   curr = list;
5636   next = curr->next;
5637   while (next != NULL) {
5638     compare = StringICmp (curr->qual, next->qual);
5639     if (compare > 0) return FALSE;
5640     if (compare == 0) {
5641       compare = StringICmp (curr->val, next->val);
5642       if (compare > 0) return FALSE;
5643     }
5644     curr = next;
5645     next = curr->next;
5646   }
5647   return TRUE;
5648 }
5649 
SortFeatureGBQuals(GBQualPtr list)5650 NLM_EXTERN GBQualPtr SortFeatureGBQuals (GBQualPtr list)
5651 
5652 {
5653   size_t     count, i;
5654   GBQualPtr  gbq, PNTR head;
5655 
5656   if (list == NULL) return NULL;
5657   if (GBQualsAlreadyInOrder (list)) return list;
5658 
5659   for (gbq = list, count = 0; gbq != NULL; gbq = gbq->next, count++) continue;
5660   head = MemNew (sizeof (GBQualPtr) * (count + 1));
5661 
5662   for (gbq = list, i = 0; gbq != NULL && i < count; i++) {
5663     head [i] = gbq;
5664     gbq = gbq->next;
5665   }
5666 
5667   StableMergeSort (head, count, sizeof (GBQualPtr), SortByGBQualKeyAndVal);
5668 
5669   for (i = 0; i < count; i++) {
5670     gbq = head [i];
5671     gbq->next = head [i + 1];
5672   }
5673 
5674   list = head [0];
5675   MemFree (head);
5676 
5677   return list;
5678 }
5679 
CleanupDuplicateGBQuals(GBQualPtr PNTR prevgbq)5680 NLM_EXTERN void CleanupDuplicateGBQuals (GBQualPtr PNTR prevgbq)
5681 
5682 {
5683   GBQualPtr  gbq;
5684   GBQualPtr  last = NULL;
5685   GBQualPtr  next;
5686   Boolean    unlink;
5687 
5688   if (prevgbq == NULL) return;
5689   gbq = *prevgbq;
5690   while (gbq != NULL) {
5691     next = gbq->next;
5692     unlink = FALSE;
5693     if (last != NULL) {
5694       if (StringICmp (last->qual, gbq->qual) == 0 &&
5695           StringICmp (last->val, gbq->val) == 0) {
5696         unlink = TRUE;
5697       }
5698     } else {
5699       last = gbq;
5700     }
5701     if (unlink) {
5702       *prevgbq = gbq->next;
5703       gbq->next = NULL;
5704       GBQualFree (gbq);
5705     } else {
5706       last = gbq;
5707       prevgbq = (GBQualPtr PNTR) &(gbq->next);
5708     }
5709     gbq = next;
5710   }
5711 }
5712 
5713 /* this identifies gbquals that should have been placed into special fields */
5714 
5715 #define NUM_ILLEGAL_QUALS 14
5716 
5717 /* StringICmp use of TO_UPPER means translation should go before transl_XXX */
5718 
5719 static CharPtr illegalGbqualList [NUM_ILLEGAL_QUALS] = {
5720   "anticodon",
5721   "citation",
5722   "codon_start",
5723   "db_xref",
5724   "evidence",
5725   "exception",
5726   "gene",
5727   "note",
5728   "protein_id",
5729   "pseudo",
5730   "transcript_id",
5731   "translation",
5732   "transl_except",
5733   "transl_table",
5734 };
5735 
QualifierIsIllegal(CharPtr qualname)5736 static Int2 QualifierIsIllegal (CharPtr qualname)
5737 
5738 {
5739   Int2  L, R, mid;
5740 
5741   if (qualname == NULL || *qualname == '\0') return FALSE;
5742 
5743   L = 0;
5744   R = NUM_ILLEGAL_QUALS - 1;
5745 
5746   while (L < R) {
5747     mid = (L + R) / 2;
5748     if (StringICmp (illegalGbqualList [mid], qualname) < 0) {
5749       L = mid + 1;
5750     } else {
5751       R = mid;
5752     }
5753   }
5754 
5755   if (StringICmp (illegalGbqualList [R], qualname) == 0) {
5756     return TRUE;
5757   }
5758 
5759   return FALSE;
5760 }
5761 
GbqualLink(GBQualPtr PNTR head,GBQualPtr qual)5762 static void GbqualLink (GBQualPtr PNTR head, GBQualPtr qual)
5763 
5764 {
5765   GBQualPtr  gbq;
5766 
5767   if (head == NULL || qual == NULL) return;
5768   gbq = *head;
5769   if (gbq != NULL) {
5770     while (gbq->next != NULL) {
5771       gbq = gbq->next;
5772     }
5773     gbq->next = qual;
5774   } else {
5775     *head = qual;
5776   }
5777 }
5778 
SortIllegalGBQuals(GBQualPtr list)5779 static GBQualPtr SortIllegalGBQuals (GBQualPtr list)
5780 
5781 {
5782   GBQualPtr  gbq, next, legal = NULL, illegal = NULL;
5783 
5784   gbq = list;
5785   while (gbq != NULL) {
5786     next = gbq->next;
5787     gbq->next = NULL;
5788     if (QualifierIsIllegal (gbq->qual)) {
5789       GbqualLink (&illegal, gbq);
5790     } else {
5791       GbqualLink (&legal, gbq);
5792     }
5793     gbq = next;
5794   }
5795   GbqualLink (&legal, illegal);
5796   return legal;
5797 }
5798 
IsSubString(CharPtr str1,CharPtr str2)5799 static Boolean IsSubString (CharPtr str1, CharPtr str2)
5800 
5801 {
5802   Char    ch;
5803   size_t  len1, len2;
5804 
5805   len1 = StringLen (str1);
5806   len2 = StringLen (str2);
5807   if (len1 >= len2) return FALSE;
5808   if (StringNICmp (str1, str2, len1) != 0) return FALSE;
5809   ch = str2 [len1];
5810   if (IS_ALPHANUM (ch)) return FALSE;
5811   return TRUE;
5812 }
5813 
SortByOrgModSubtype(VoidPtr ptr1,VoidPtr ptr2)5814 static int LIBCALLBACK SortByOrgModSubtype (VoidPtr ptr1, VoidPtr ptr2)
5815 
5816 {
5817   int        compare;
5818   OrgModPtr  omp1;
5819   OrgModPtr  omp2;
5820   CharPtr    str1;
5821   CharPtr    str2;
5822 
5823   if (ptr1 == NULL || ptr2 == NULL) return 0;
5824   omp1 = *((OrgModPtr PNTR) ptr1);
5825   omp2 = *((OrgModPtr PNTR) ptr2);
5826   if (omp1 == NULL || omp2 == NULL) return 0;
5827   if (omp1->subtype > omp2->subtype) {
5828     return 1;
5829   } else if (omp1->subtype < omp2->subtype) {
5830     return -1;
5831   }
5832   str1 = (CharPtr) omp1->subname;
5833   str2 = (CharPtr) omp2->subname;
5834   if (str1 == NULL || str2 == NULL) return 0;
5835   compare = StringICmp (str1, str2);
5836   return compare;
5837 }
5838 
OrgModsAlreadyInOrder(OrgModPtr list)5839 static Boolean OrgModsAlreadyInOrder (OrgModPtr list)
5840 
5841 {
5842   int        compare;
5843   OrgModPtr  curr;
5844   OrgModPtr  next;
5845   CharPtr    str1;
5846   CharPtr    str2;
5847 
5848   if (list == NULL || list->next == NULL) return TRUE;
5849   curr = list;
5850   next = curr->next;
5851   while (next != NULL) {
5852     if (curr->subtype > next->subtype) return FALSE;
5853     str1 = (CharPtr) curr->subname;
5854     str2 = (CharPtr) next->subname;
5855     compare = StringICmp (str1, str2);
5856     if (compare > 0) return FALSE;
5857     curr = next;
5858     next = curr->next;
5859   }
5860   return TRUE;
5861 }
5862 
SortOrgModList(OrgModPtr list)5863 static OrgModPtr SortOrgModList (OrgModPtr list)
5864 
5865 {
5866   size_t     count, i;
5867   OrgModPtr  omp, PNTR head;
5868 
5869   if (list == NULL) return NULL;
5870   if (OrgModsAlreadyInOrder (list)) return list;
5871 
5872   for (omp = list, count = 0; omp != NULL; omp = omp->next, count++) continue;
5873   head = MemNew (sizeof (OrgModPtr) * (count + 1));
5874 
5875   for (omp = list, i = 0; omp != NULL && i < count; i++) {
5876     head [i] = omp;
5877     omp = omp->next;
5878   }
5879 
5880   StableMergeSort (head, count, sizeof (OrgModPtr), SortByOrgModSubtype);
5881 
5882   for (i = 0; i < count; i++) {
5883     omp = head [i];
5884     omp->next = head [i + 1];
5885   }
5886 
5887   list = head [0];
5888   MemFree (head);
5889 
5890   return list;
5891 }
5892 
5893 
RemoveSpaceBeforeAndAfterColon(CharPtr str)5894 static void RemoveSpaceBeforeAndAfterColon (CharPtr str)
5895 {
5896   CharPtr pColon, cp, src, dst;
5897 
5898   if (StringHasNoText (str)) {
5899     return;
5900   }
5901 
5902   pColon = StringChr (str, ':');
5903   while (pColon != NULL) {
5904     cp = pColon - 1;
5905     while (cp > str && isspace (*cp)) {
5906       cp--;
5907     }
5908     if (cp < str || !isspace (*cp)) {
5909       cp++;
5910     }
5911     *cp = ':';
5912     dst = cp + 1;
5913     cp = pColon + 1;
5914     while (isspace (*cp)) {
5915       cp++;
5916     }
5917     src = cp;
5918     pColon = dst - 1;
5919     if (src != dst) {
5920       while (*src != 0) {
5921         *dst = *src;
5922         dst++; src++;
5923       }
5924       *dst = 0;
5925     }
5926     pColon = StringChr (pColon + 1, ':');
5927   }
5928 }
5929 
CorrectTildes(CharPtr PNTR str)5930 static void CorrectTildes (
5931   CharPtr PNTR str
5932 )
5933 
5934 {
5935 #ifndef OS_MSWIN
5936   FindReplaceString (str, "were ~25 cm in height (~3 weeks)", "were ~~25 cm in height (~~3 weeks)", FALSE, FALSE);
5937   FindReplaceString (str, "generally ~3 weeks", "generally ~~3 weeks", FALSE, FALSE);
5938   FindReplaceString (str, "sequencing (~4 96-well plates)", "sequencing (~~4 96-well plates)", FALSE, FALSE);
5939   FindReplaceString (str, "size distribution (~2 kb)", "size distribution (~~2 kb)", FALSE, FALSE);
5940   FindReplaceString (str, "sequencing (~3 96-well plates)", "sequencing (~~3 96-well plates)", FALSE, FALSE);
5941   FindReplaceString (str, "vector. 1~2 ul of ligated", "vector. 1~~2 ul of ligated", FALSE, FALSE);
5942   /*
5943   FindReplaceString (str, "Lambda FLC I.~Islet cells were provided", "Lambda FLC I.~~Islet cells were provided", FALSE, FALSE);
5944   */
5945   FindReplaceString (str, "different strains~of mice", "different strains of mice", FALSE, FALSE);
5946   FindReplaceString (str, "oligo-dT-NotI primer~(5'-biotin", "oligo-dT-NotI primer (5'-biotin", FALSE, FALSE);
5947   FindReplaceString (str, "sizes of 200~800 bp were purified", "sizes of 200~~800 bp were purified", FALSE, FALSE);
5948   FindReplaceString (str, "Tween 20 (~50 ml per tree)", "Tween 20 (~~50 ml per tree)", FALSE, FALSE);
5949   FindReplaceString (str, "the SMART approach (~http://www.evrogen.com", "the SMART approach (http://www.evrogen.com", FALSE, FALSE);
5950   FindReplaceString (str, "the morning (~10 am) with", "the morning (~~10 am) with", FALSE, FALSE);
5951   FindReplaceString (str, "(host) sequences (~10%)", "(host) sequences (~~10%)", FALSE, FALSE);
5952   /*
5953   FindReplaceString (str, "unidirectionally.~ High quality", "unidirectionally. High quality", FALSE, FALSE);
5954   FindReplaceString (str, "onlysubmitted.~ Average", "onlysubmitted. Average", FALSE, FALSE);
5955   */
5956   FindReplaceString (str, "Plasmid; ~The F03-1270", "Plasmid; The F03-1270", FALSE, FALSE);
5957   FindReplaceString (str, "using STS-PCR~from Eb", "using STS-PCR from Eb", FALSE, FALSE);
5958   FindReplaceString (str, "specific to~the Eb", "specific to the Eb", FALSE, FALSE);
5959   FindReplaceString (str, "side of insert); , M.F., Lennon", "side of insert); Bonaldo, M.F., Lennon", FALSE, FALSE);
5960   FindReplaceString (str, "Uni-ZAP XR vector. 1~2 ul of", "Uni-ZAP XR vector. 1~~2 ul of", FALSE, FALSE);
5961   FindReplaceString (str, "from diploid~Secale montanum", "from diploid Secale montanum", FALSE, FALSE);
5962   FindReplaceString (str, "homology with~U43516,", "homology with U43516,", FALSE, FALSE);
5963   /*
5964   FindReplaceString (str, "from http//www.biobase.dk/~ddbase", "from http//www.biobase.dk/~~ddbase", FALSE, FALSE);
5965   */
5966   FindReplaceString (str, "plasmid; ~Assembled EST", "plasmid; Assembled EST", FALSE, FALSE);
5967   FindReplaceString (str, "databases.~Different cDNA", "databases. Different cDNA", FALSE, FALSE);
5968   FindReplaceString (str, "enzyme PstI.~DH5-alpha", "enzyme PstI. DH5-alpha", FALSE, FALSE);
5969   FindReplaceString (str, "as they~were prepared", "as they were prepared", FALSE, FALSE);
5970   FindReplaceString (str, "loci in~the genome", "loci in the genome", FALSE, FALSE);
5971   FindReplaceString (str, "P{CaSpeR}Cp1~50C (FBti0004219)", "P{CaSpeR}Cp1~~50C (FBti0004219)", FALSE, FALSE);
5972   FindReplaceString (str, "seedlings with 2~4 leaves", "seedlings with 2~~4 leaves", FALSE, FALSE);
5973   FindReplaceString (str, "tween 20 (~50mLs per tree)", "tween 20 (~~50mLs per tree)", FALSE, FALSE);
5974 #endif
5975 }
5976 
FixStrainForPrefix(OrgModPtr omp)5977 static void FixStrainForPrefix (OrgModPtr omp)
5978 
5979 {
5980   Char        ch;
5981   CharPtr     cpy;
5982   ValNodePtr  head = NULL;
5983   size_t      len;
5984   CharPtr     pfx;
5985   CharPtr     sfx;
5986   CharPtr     str;
5987   CharPtr     tmp;
5988   ValNodePtr  vnp;
5989 
5990   if (omp == NULL || omp->subtype != ORGMOD_strain) return;
5991   str = omp->subname;
5992   if (StringHasNoText (str)) return;
5993 
5994   head = SplitStringAtSemicolon (str);
5995   if (head == NULL) return;
5996 
5997   for (vnp = head; vnp != NULL; vnp = vnp->next) {
5998     str = (CharPtr) vnp->data.ptrvalue;
5999     if (StringHasNoText (str)) continue;
6000     TrimSpacesAroundString (str);
6001 
6002     pfx = NULL;
6003     sfx = NULL;
6004     if (StringNICmp (str, "ATCC", 4) == 0) {
6005       pfx = "ATCC";
6006       sfx = str + 4;
6007     } else if (StringNICmp (str, "DSM", 3) == 0) {
6008       pfx = "DSM";
6009       sfx = str + 3;
6010     }
6011     if (pfx == NULL || sfx == NULL) continue;
6012 
6013     ch = *sfx;
6014     if (ch == ':' || ch == '/') {
6015       sfx++;
6016     }
6017     cpy = StringSave (sfx);
6018     TrimSpacesAroundString(cpy);
6019     if (! StringIsAllDigits (cpy)) {
6020       cpy = MemFree (cpy);
6021       continue;
6022     }
6023 
6024     len = StringLen (pfx) + StringLen (cpy) + 3;
6025     tmp = (CharPtr) MemNew (len);
6026     if (tmp == NULL) continue;
6027     StringCpy (tmp, pfx);
6028     StringCat (tmp, " ");
6029     StringCat (tmp, cpy);
6030     vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
6031     vnp->data.ptrvalue = tmp;
6032     cpy = MemFree (cpy);
6033   }
6034 
6035   tmp = ValNodeMergeStrsEx (head, "; ");
6036   if (tmp == NULL) return;
6037 
6038   omp->subname = MemFree (omp->subname);
6039   omp->subname = tmp;
6040 }
6041 
CleanOrgModListEx(OrgModPtr PNTR ompp,CharPtr orpcommon)6042 static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
6043 
6044 {
6045   Char            ch;
6046   OrgModPtr       last = NULL;
6047   OrgModPtr       next;
6048   OrgModPtr       omp;
6049   OrgModPtr       omp_anamorph, omp_gb_anamorph, omp_other;
6050   OrgModPtr PNTR  prev;
6051   CharPtr         ptr;
6052   Boolean         redund;
6053   CharPtr         str;
6054   CharPtr         tmp;
6055   Boolean         unlink;
6056 
6057   if (ompp == NULL) return;
6058   prev = ompp;
6059   omp = *ompp;
6060   while (omp != NULL) {
6061     next = omp->next;
6062     unlink= FALSE;
6063     CleanVisStringAndCompress (&(omp->subname));
6064     TrimSpacesAndJunkFromEnds (omp->subname, FALSE);
6065     RemoveFlankingQuotes (&(omp->subname));
6066     CleanVisStringAndCompress (&(omp->attrib));
6067     if (omp->subtype == ORGMOD_other && StringDoesHaveText (omp->subname)) {
6068       CorrectTildes (&(omp->subname));
6069     }
6070     if (omp->subtype == ORGMOD_common && StringICmp (omp->subname, orpcommon) == 0) {
6071       /*
6072       unlink = TRUE;
6073       */
6074     } else if (last != NULL) {
6075       if (HasNoText (omp->subname)) {
6076         unlink = TRUE;
6077       } else if ((last->subtype == omp->subtype &&
6078                  StringICmp (last->subname, omp->subname) == 0) ||
6079                  (last->subtype == omp->subtype &&
6080                  last->subtype == ORGMOD_other &&
6081                   StringStr (last->subname, omp->subname) != NULL)) {
6082         unlink = TRUE;
6083       } else if (last->subtype == omp->subtype &&
6084                  last->subtype == ORGMOD_other &&
6085                  IsSubString (last->subname, omp->subname)) {
6086         last->subname = MemFree (last->subname);
6087         last->subname = omp->subname;
6088         omp->subname = NULL;
6089         unlink = TRUE;
6090       }
6091     } else if (HasNoText (omp->subname) ||
6092                StringCmp (omp->subname, ")") == 0 ||
6093                StringCmp (omp->subname, "(") == 0) {
6094       unlink = TRUE;
6095     } else {
6096       last = omp;
6097     }
6098     if (unlink) {
6099       *prev = omp->next;
6100       omp->next = NULL;
6101       OrgModFree (omp);
6102     } else {
6103       last = omp;
6104       prev = &(omp->next);
6105     }
6106     omp = next;
6107   }
6108 
6109 
6110   for (omp = *ompp; omp != NULL; omp = omp->next) {
6111     if (omp->subtype != ORGMOD_specimen_voucher &&
6112         omp->subtype != ORGMOD_culture_collection &&
6113         omp->subtype != ORGMOD_bio_material) continue;
6114     if (StringHasNoText (omp->subname)) continue;
6115     RemoveSpaceBeforeAndAfterColon (omp->subname);
6116     ptr = StringStr (omp->subname, "::");
6117     if (ptr == NULL) continue;
6118     ptr++;
6119     tmp = ptr;
6120     tmp++;
6121     ch = *tmp;
6122     while (ch != '\0') {
6123       *ptr = ch;
6124       ptr++;
6125       tmp++;
6126       ch = *tmp;
6127     }
6128     *ptr = '\0';
6129   }
6130 
6131   omp_anamorph = NULL;
6132   omp_gb_anamorph = NULL;
6133   omp_other = NULL;
6134   redund = FALSE;
6135 
6136   for (omp = *ompp; omp != NULL; omp = omp->next) {
6137     if (omp->subtype == ORGMOD_anamorph) {
6138       omp_anamorph = omp;
6139     } else if (omp->subtype == ORGMOD_gb_anamorph) {
6140       omp_gb_anamorph = omp;
6141     } else if (omp->subtype == ORGMOD_other) {
6142       omp_other = omp;
6143     } else if (omp->subtype == ORGMOD_nat_host) {
6144       if (StringICmp (omp->subname, "human") == 0) {
6145         omp->subname = MemFree (omp->subname);
6146         omp->subname = StringSave ("Homo sapiens");
6147       }
6148     } else if (omp->subtype == ORGMOD_strain) {
6149       FixStrainForPrefix (omp);
6150     }
6151   }
6152   if (omp_other != NULL && StringNICmp (omp_other->subname, "anamorph:", 9) == 0) {
6153     ptr = omp_other->subname + 9;
6154     ch = *ptr;
6155     while (ch == ' ') {
6156       ptr++;
6157       ch = *ptr;
6158     }
6159     if (omp_anamorph != NULL) {
6160       str = omp_anamorph->subname;
6161       if (StringCmp (ptr, str) == 0) {
6162         redund = TRUE;
6163       }
6164     } else if (omp_gb_anamorph != NULL) {
6165       str = omp_gb_anamorph->subname;
6166       if (StringCmp (ptr, str) == 0) {
6167         redund = TRUE;
6168       }
6169     }
6170   }
6171   if (redund) {
6172     prev = ompp;
6173     omp = *ompp;
6174     while (omp != NULL) {
6175       next = omp->next;
6176       unlink= FALSE;
6177       if (omp == omp_other) {
6178         unlink= TRUE;
6179       }
6180       if (unlink) {
6181         *prev = omp->next;
6182         omp->next = NULL;
6183         OrgModFree (omp);
6184       } else {
6185         prev = &(omp->next);
6186       }
6187       omp = next;
6188     }
6189   }
6190 }
6191 
CleanOrgModList(OrgModPtr PNTR ompp)6192 NLM_EXTERN void CleanOrgModList (OrgModPtr PNTR ompp)
6193 
6194 {
6195   CleanOrgModListEx (ompp, NULL);
6196 }
6197 
IsNoNameSubSource(SubSourcePtr ssp)6198 static Boolean IsNoNameSubSource (SubSourcePtr ssp)
6199 
6200 {
6201   if (ssp == NULL) return FALSE;
6202 
6203   return (Boolean) (ssp->subtype == SUBSRC_germline ||
6204                     ssp->subtype == SUBSRC_rearranged ||
6205                     ssp->subtype == SUBSRC_transgenic ||
6206                     ssp->subtype == SUBSRC_environmental_sample ||
6207                     ssp->subtype == SUBSRC_metagenomic);
6208 }
6209 
SortBySubSourceSubtype(VoidPtr ptr1,VoidPtr ptr2)6210 static int LIBCALLBACK SortBySubSourceSubtype (VoidPtr ptr1, VoidPtr ptr2)
6211 
6212 {
6213   int           compare;
6214   SubSourcePtr  ssp1;
6215   SubSourcePtr  ssp2;
6216   CharPtr       str1;
6217   CharPtr       str2;
6218 
6219   if (ptr1 == NULL || ptr2 == NULL) return 0;
6220   ssp1 = *((SubSourcePtr PNTR) ptr1);
6221   ssp2 = *((SubSourcePtr PNTR) ptr2);
6222   if (ssp1 == NULL || ssp2 == NULL) return 0;
6223   if (ssp1->subtype > ssp2->subtype) {
6224     return 1;
6225   } else if (ssp1->subtype < ssp2->subtype) {
6226     return -1;
6227   }
6228   if (IsNoNameSubSource (ssp1)) return 0;
6229   str1 = (CharPtr) ssp1->name;
6230   str2 = (CharPtr) ssp2->name;
6231   if (str1 == NULL || str2 == NULL) return 0;
6232   compare = StringICmp (str1, str2);
6233   return compare;
6234 }
6235 
SubSourceAlreadyInOrder(SubSourcePtr list)6236 static Boolean SubSourceAlreadyInOrder (SubSourcePtr list)
6237 
6238 {
6239   int           compare;
6240   SubSourcePtr  curr;
6241   SubSourcePtr  next;
6242   CharPtr       str1;
6243   CharPtr       str2;
6244 
6245   if (list == NULL || list->next == NULL) return TRUE;
6246   curr = list;
6247   next = curr->next;
6248   while (next != NULL) {
6249     if (curr->subtype > next->subtype) return FALSE;
6250     if (curr->subtype == next->subtype) {
6251       if (! IsNoNameSubSource (curr)) {
6252         str1 = (CharPtr) curr->name;
6253         str2 = (CharPtr) next->name;
6254         compare = StringICmp (str1, str2);
6255         if (compare > 0) return FALSE;
6256       }
6257     }
6258     curr = next;
6259     next = curr->next;
6260   }
6261   return TRUE;
6262 }
6263 
SortSubSourceList(SubSourcePtr list)6264 static SubSourcePtr SortSubSourceList (SubSourcePtr list)
6265 
6266 {
6267   size_t        count, i;
6268   SubSourcePtr  ssp, PNTR head;
6269 
6270   if (list == NULL) return NULL;
6271   if (SubSourceAlreadyInOrder (list)) return list;
6272 
6273   for (ssp = list, count = 0; ssp != NULL; ssp = ssp->next, count++) continue;
6274   head = MemNew (sizeof (SubSourcePtr) * (count + 1));
6275 
6276   for (ssp = list, i = 0; ssp != NULL && i < count; i++) {
6277     head [i] = ssp;
6278     ssp = ssp->next;
6279   }
6280 
6281   StableMergeSort (head, count, sizeof (SubSourcePtr), SortBySubSourceSubtype);
6282 
6283   for (i = 0; i < count; i++) {
6284     ssp = head [i];
6285     ssp->next = head [i + 1];
6286   }
6287 
6288   list = head [0];
6289   MemFree (head);
6290 
6291   return list;
6292 }
6293 
6294 //LCOV_EXCL_START
TrimParenthesesAndCommasAroundString(CharPtr str)6295 static CharPtr TrimParenthesesAndCommasAroundString (CharPtr str)
6296 
6297 {
6298   Uchar    ch;    /* to use 8bit characters in multibyte languages */
6299   CharPtr  dst;
6300   CharPtr  ptr;
6301 
6302   if (str != NULL && str [0] != '\0') {
6303     dst = str;
6304     ptr = str;
6305     ch = *ptr;
6306     while (ch != '\0' && (ch < ' ' || ch == '(' || ch == ',')) {
6307       ptr++;
6308       ch = *ptr;
6309     }
6310     while (ch != '\0') {
6311       *dst = ch;
6312       dst++;
6313       ptr++;
6314       ch = *ptr;
6315     }
6316     *dst = '\0';
6317     dst = NULL;
6318     ptr = str;
6319     ch = *ptr;
6320     while (ch != '\0') {
6321       if (ch != ')' && ch != ',') {
6322         dst = NULL;
6323       } else if (dst == NULL) {
6324         dst = ptr;
6325       }
6326       ptr++;
6327       ch = *ptr;
6328     }
6329     if (dst != NULL) {
6330       *dst = '\0';
6331     }
6332   }
6333   return str;
6334 }
6335 
CombineSplitQual(CharPtr origval,CharPtr newval)6336 static CharPtr CombineSplitQual (CharPtr origval, CharPtr newval)
6337 
6338 {
6339   size_t   len;
6340   CharPtr  str = NULL;
6341 
6342   if (StringStr (origval, newval) != NULL) return origval;
6343   len = StringLen (origval) + StringLen (newval) + 5;
6344   str = MemNew (sizeof (Char) * len);
6345   if (str == NULL) return origval;
6346   TrimParenthesesAndCommasAroundString (origval);
6347   TrimParenthesesAndCommasAroundString (newval);
6348   StringCpy (str, "(");
6349   StringCat (str, origval);
6350   StringCat (str, ",");
6351   StringCat (str, newval);
6352   StringCat (str, ")");
6353   /* free original string, knowing return value will replace it */
6354   MemFree (origval);
6355   return str;
6356 }
6357 //LCOV_EXCL_STOP
6358 
LocationForPlastidText(CharPtr plastid_name)6359 static Uint1 LocationForPlastidText (CharPtr plastid_name)
6360 {
6361   if (StringICmp (plastid_name, "chloroplast") == 0) {
6362     return GENOME_chloroplast;
6363   } else if (StringICmp (plastid_name, "chromoplast") == 0) {
6364     return GENOME_chromoplast;
6365   } else if (StringICmp (plastid_name, "kinetoplast") == 0) {
6366     return GENOME_kinetoplast;
6367   } else if (StringICmp (plastid_name, "plastid") == 0) {
6368     return GENOME_plastid;
6369   } else if (StringICmp (plastid_name, "apicoplast") == 0) {
6370     return GENOME_apicoplast;
6371   } else if (StringICmp (plastid_name, "leucoplast") == 0) {
6372     return GENOME_leucoplast;
6373   } else if (StringICmp (plastid_name, "proplastid") == 0) {
6374     return GENOME_proplastid;
6375   } else if (StringICmp (plastid_name, "chromatophore") == 0) {
6376     return GENOME_chromatophore;
6377   } else {
6378     return 0;
6379   }
6380 }
6381 
6382 //LCOV_EXCL_START
StringToLower(CharPtr str)6383 NLM_EXTERN void StringToLower (CharPtr str)
6384 
6385 {
6386   Char  ch;
6387 
6388   if (str == NULL) return;
6389   ch = *str;
6390   while (ch != '\0') {
6391     *str = TO_LOWER (ch);
6392     str++;
6393     ch = *str;
6394   }
6395 }
6396 //LCOV_EXCL_STOP
6397 
6398 
CleanPCRPrimerSeq(CharPtr seq)6399 static void CleanPCRPrimerSeq (CharPtr seq)
6400 {
6401   CharPtr ptr, src, dst, tmp;
6402   Char    ch;
6403   Boolean in_brackets = FALSE;
6404   Int4    i;
6405 
6406   if (StringHasNoText (seq)) {
6407     return;
6408   }
6409 
6410   /* upper case sequence */
6411   ptr = seq;
6412   ch = *ptr;
6413   while (ch != '\0') {
6414     if (IS_UPPER (ch)) {
6415       *ptr = TO_LOWER (ch);
6416     }
6417     ptr++;
6418     ch = *ptr;
6419   }
6420   /* remove any spaces in sequence outisde of <modified base> */
6421   src = seq;
6422   dst = seq;
6423   ch = *src;
6424   while (ch != '\0') {
6425     if (ch == '<') {
6426       in_brackets = TRUE;
6427       *dst = ch;
6428       dst++;
6429     } else if (ch == '>') {
6430       in_brackets = FALSE;
6431       *dst = ch;
6432       dst++;
6433     } else if (ch != ' ') {
6434       *dst = ch;
6435       dst++;
6436     } else if (in_brackets) {
6437       *dst = ch;
6438       dst++;
6439     }
6440     src++;
6441     ch = *src;
6442   }
6443   *dst = '\0';
6444   /* upper case modified base <OTHER> */
6445   ptr = seq;
6446   tmp = StringStr (ptr, "<other>");
6447   while (tmp != NULL) {
6448     ptr = tmp + 7;
6449     for (i = 1; i < 6; i++) {
6450       ch = tmp [i];
6451       tmp [i] = TO_UPPER (ch);
6452     }
6453     tmp = StringStr (ptr, "<other>");
6454   }
6455 }
6456 
6457 
CleanupPCRPrimers(PCRPrimerPtr PNTR pppp)6458 static void CleanupPCRPrimers (PCRPrimerPtr PNTR pppp)
6459 
6460 {
6461   PCRPrimerPtr       next;
6462   PCRPrimerPtr PNTR  prev;
6463   PCRPrimerPtr       ppp;
6464   PCRPrimerPtr       pr1, pr2;
6465 
6466   if (pppp == NULL) return;
6467 
6468   ppp = *pppp;
6469   while (ppp != NULL) {
6470     CleanVisString (&(ppp->seq));
6471     CleanPCRPrimerSeq (ppp->seq);
6472     CleanVisString (&(ppp->name));
6473     Asn2gnbkCompressSpaces (ppp->name);
6474     StringToLower (ppp->seq);
6475 
6476     ppp = ppp->next;
6477   }
6478 
6479   ppp = *pppp;
6480   for (pr1 = ppp; pr1 != NULL; pr1 = pr1->next)  {
6481     for (pr2 = pr1->next; pr2 != NULL; pr2 = pr2->next) {
6482       if (StringCmp (pr1->seq, pr2->seq) == 0 && StringCmp (pr1->name, pr2->name) == 0) {
6483         pr2->seq = MemFree (pr2->seq);
6484         pr2->name = MemFree (pr2->name);
6485       } else if (StringCmp (pr1->name, pr2->name) == 0) {
6486         if (StringHasNoText (pr1->seq)) {
6487           pr1->seq = MemFree (pr1->seq);
6488           pr1->seq = pr2->seq;
6489           pr2->seq = NULL;
6490         } else if (StringHasNoText (pr2->seq)) {
6491           pr2->seq = MemFree (pr2->seq);
6492           pr2->name = MemFree (pr2->name);
6493         }
6494       }
6495     }
6496   }
6497 
6498   prev = pppp;
6499   ppp = *pppp;
6500   while (ppp != NULL) {
6501     next = ppp->next;
6502 
6503     CleanVisString (&(ppp->seq));
6504     CleanPCRPrimerSeq (ppp->seq);
6505     CleanVisString (&(ppp->name));
6506 
6507     if (ppp->seq == NULL && ppp->name == NULL) {
6508       *prev = next;
6509       ppp->next = NULL;
6510       PCRPrimerFree (ppp);
6511     } else {
6512       StringToLower (ppp->seq);
6513       prev = &(ppp->next);
6514     }
6515 
6516     ppp = next;
6517   }
6518 
6519   /* fix artifact caused by fwd/rev-primer-seq starting with colon, separating name and seq */
6520 
6521   ppp = *pppp;
6522   if (ppp == NULL) return;
6523   next = ppp->next;
6524   if (next == NULL) return;
6525   if (next->next != NULL) return;
6526 
6527   if (ppp->name != NULL && ppp->seq == NULL && next->name == NULL && next->seq != NULL) {
6528     ppp->seq = next->seq;
6529     next->seq = NULL;
6530     ppp->next = NULL;
6531     PCRPrimerFree (next);
6532   } else if (ppp->seq != NULL && ppp->name == NULL && next->seq == NULL && next->name != NULL) {
6533     ppp->name = next->name;
6534     next->name = NULL;
6535     ppp->next = NULL;
6536     PCRPrimerFree (next);
6537   }
6538 }
6539 
PCRPrimersMatch(PCRPrimerPtr ppp1,PCRPrimerPtr ppp2)6540 static Boolean PCRPrimersMatch (PCRPrimerPtr ppp1, PCRPrimerPtr ppp2)
6541 
6542 {
6543   Int2          len1 = 0, len2 = 0, matches = 0;
6544   PCRPrimerPtr  pr1, pr2;
6545 
6546   if (ppp1 == NULL || ppp2 == NULL) return FALSE;
6547 
6548   for (pr1 = ppp1; pr1 != NULL; pr1 = pr1->next) {
6549     len1++;
6550   }
6551   for (pr2 = ppp2; pr2 != NULL; pr2 = pr2->next) {
6552     len2++;
6553   }
6554   if (len1 != len2) return FALSE;
6555 
6556   for (pr1 = ppp1; pr1 != NULL; pr1 = pr1->next) {
6557     for (pr2 = ppp2; pr2 != NULL; pr2 = pr2->next) {
6558       if (StringCmp (pr1->seq, pr2->seq) == 0 && StringCmp (pr1->name, pr2->name) == 0) {
6559         matches++;
6560       }
6561     }
6562   }
6563 
6564   if (matches == len1) return TRUE;
6565 
6566   return FALSE;
6567 }
6568 
PCRReactionSetsMatch(PCRReactionSetPtr prp1,PCRReactionSetPtr prp2)6569 static Boolean PCRReactionSetsMatch (PCRReactionSetPtr prp1, PCRReactionSetPtr prp2)
6570 
6571 {
6572   if (prp1 == NULL || prp2 == NULL) return FALSE;
6573 
6574   if (! PCRPrimersMatch (prp1->forward, prp2->forward)) return FALSE;
6575   if (! PCRPrimersMatch (prp1->reverse, prp2->reverse)) return FALSE;
6576 
6577   return TRUE;
6578 }
6579 
CleanupPCRReactionSet(PCRReactionSetPtr PNTR prpp)6580 static void CleanupPCRReactionSet (PCRReactionSetPtr PNTR prpp)
6581 
6582 {
6583   PCRReactionSetPtr       curr;
6584   PCRReactionSetPtr       next;
6585   PCRReactionSetPtr PNTR  prev;
6586   PCRReactionSetPtr       prp;
6587 
6588   if (prpp == NULL) return;
6589 
6590   prp = *prpp;
6591   while (prp != NULL) {
6592     CleanupPCRPrimers (&(prp->forward));
6593     CleanupPCRPrimers (&(prp->reverse));
6594     prp = prp->next;
6595   }
6596 
6597   prev = prpp;
6598   prp = *prpp;
6599   while (prp != NULL) {
6600     next = prp->next;
6601 
6602     curr = next;
6603     while (curr != NULL) {
6604       if (PCRReactionSetsMatch (prp, curr)) {
6605         curr->forward = PCRPrimerFree (curr->forward);
6606         curr->reverse = PCRPrimerFree (curr->reverse);
6607       }
6608       curr = curr->next;
6609     }
6610 
6611     if (prp->forward == NULL && prp->reverse == NULL) {
6612       *prev = next;
6613       prp->next = NULL;
6614       PCRReactionFree (prp);
6615     } else {
6616       prev = &(prp->next);
6617     }
6618 
6619     prp = next;
6620   }
6621 
6622 }
6623 
CleanupAltitude(SubSourcePtr ssp)6624 static void CleanupAltitude (SubSourcePtr ssp)
6625 
6626 {
6627   Char     ch;
6628   size_t   len;
6629   CharPtr  ptr;
6630 
6631   if (ssp == NULL || StringHasNoText (ssp->name)) return;
6632   len = StringLen (ssp->name);
6633   if (len < 1) return;
6634 
6635   ptr = ssp->name;
6636   ch = *ptr;
6637 
6638   if (len > 2 && ptr [len-1] == '.') {
6639     ptr [len-1] = '\0';
6640   }
6641 
6642   if (ch == '+' || ch == '-') {
6643     ptr++;
6644     ch = *ptr;
6645   }
6646 
6647   if (! IS_DIGIT (ch)) return;
6648 
6649   ptr++;
6650   ch = *ptr;
6651   while (IS_DIGIT (ch)) {
6652     ptr++;
6653     ch = *ptr;
6654   }
6655 
6656   if (ch == '.') {
6657     ptr++;
6658     ch = *ptr;
6659     if (! IS_DIGIT (ch)) return;
6660     ptr++;
6661     ch = *ptr;
6662     while (IS_DIGIT (ch)) {
6663       ptr++;
6664       ch = *ptr;
6665     }
6666   }
6667 
6668   if (StringCmp (ptr, "m") == 0 ||
6669       StringCmp (ptr, "m.") == 0 ||
6670       StringCmp (ptr, " m") == 0||
6671       StringCmp (ptr, " meters") == 0||
6672       StringCmp (ptr, " metres") == 0) {
6673     *ptr = '\0';
6674     ptr = (CharPtr) MemNew (len + 5);
6675     if (ptr == NULL) return;
6676     StringCpy (ptr, ssp->name);
6677     StringCat (ptr, " m");
6678     ssp->name = MemFree (ssp->name);
6679     ssp->name = ptr;
6680   }
6681 }
6682 
6683 static CharPtr coll_date_month_abbrevs [12] =
6684 {
6685   "-Jan-", "-Feb-", "-Mar-", "-Apr-", "-May-", "-Jun-",
6686   "-Jul-", "-Aug-", "-Sep-", "-Oct-", "-Nov-", "-Dec-"
6687 };
6688 
CorrectMonthCapitalization(CharPtr str)6689 static void CorrectMonthCapitalization (CharPtr str)
6690 
6691 {
6692   Int2     i;
6693   Int2     j;
6694   CharPtr  month;
6695   CharPtr  ptr;
6696 
6697   for (i = 0; i < 12; i++) {
6698     month = coll_date_month_abbrevs [i];
6699     ptr = StringISearch (str, month);
6700     if (ptr == NULL) continue;
6701     for (j = 0; j < 5; j++) {
6702       ptr [j] = month [j];
6703     }
6704     return;
6705   }
6706 }
6707 
6708 typedef struct stringpair {
6709   CharPtr  from;
6710   CharPtr  to;
6711 } StringPair, PNTR StringPairPtr;
6712 
6713 static StringPair sex_conv[] = {
6714   { "asexual female",        "asexual and female"       },
6715   { "asexual male",          "asexual and male"         },
6716   { "dioecious female",      "dioecious and female"     },
6717   { "dioecious male",        "dioecious and male"       },
6718   { "f and m mixed",         "female, male, and mixed"  },
6719   { "f",                     "female"                   },
6720   { "f/m",                   "female and male"          },
6721   { "female,male",           "female and male"          },
6722   { "female/hermaphrodite",  "female and hermaphrodite" },
6723   { "female/male mixed",     "female, male, and mixed"  },
6724   { "female/male",           "female and male"          },
6725   { "m and f mixed",         "male, female, and mixed"  },
6726   { "m",                     "male"                     },
6727   { "m/f",                   "male and female"          },
6728   { "male,female",           "male and female"          },
6729   { "male/female mixed",     "male, female, and mixed"  },
6730   { "male/female",           "male and female"          },
6731   { "male/hermaphrodite",    "male and hermaphrodite"   },
6732   { "mixed female and male", "mixed, female, and male"  },
6733   { "mixed female/male",     "mixed, female, and male"  },
6734   { "mixed male and female", "mixed, male, and female"  },
6735   { "mixed male/female",     "mixed, male, and female"  },
6736   { NULL,                    NULL                       }
6737 };
6738 
CleanSubSourceList(SubSourcePtr PNTR sspp,Uint1 location)6739 extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
6740 
6741 {
6742   Char               ch;
6743   CharPtr            dst;
6744   Int2               i;
6745   Boolean            in_brackets = FALSE;
6746   SubSourcePtr       last = NULL;
6747   size_t             len;
6748   SubSourcePtr       next;
6749   SubSourcePtr PNTR  prev;
6750   CharPtr            ptr;
6751   CharPtr            src;
6752   SubSourcePtr       ssp;
6753   CharPtr            str;
6754   CharPtr            tmp;
6755   Boolean            unlink;
6756   /*
6757   FloatHi            ns, ew;
6758   Char               lon, lat;
6759   Int4               processed;
6760   */
6761   /*
6762   SubSourcePtr       fwd_seq = NULL, rev_seq = NULL, fwd_name = NULL, rev_name = NULL;
6763   size_t             len;
6764   */
6765 
6766   if (sspp == NULL) return;
6767   prev = sspp;
6768   ssp = *sspp;
6769   while (ssp != NULL) {
6770     next = ssp->next;
6771     unlink= FALSE;
6772     if (! IsNoNameSubSource (ssp)) {
6773       CleanVisStringAndCompress (&(ssp->name));
6774       TrimSpacesAndJunkFromEnds (ssp->name, FALSE);
6775       RemoveFlankingQuotes (&(ssp->name));
6776     } else /* if (StringICmp (ssp->name, "TRUE") == 0) */ {
6777       ssp->name = MemFree (ssp->name);
6778       ssp->name = StringSave ("");
6779     }
6780     if (ssp->subtype == SUBSRC_country) {
6781       CleanVisStringJunk (&(ssp->name));
6782       len = StringLen (ssp->name);
6783       if (len > 2) {
6784         str = ssp->name;
6785         if (str [len - 1] == ':') {
6786           str [len - 1] = '\0';
6787         }
6788       }
6789       if (StringICmp (ssp->name, "United States") == 0 ||
6790           StringICmp (ssp->name, "United States of America") == 0 ||
6791           StringICmp (ssp->name, "U.S.A.") == 0) {
6792         ssp->name = MemFree (ssp->name);
6793         ssp->name = StringSave ("USA");
6794       }
6795       if (StringNICmp (ssp->name, "United States:", 14) == 0) {
6796         str = ssp->name;
6797         str [0] = ' ';
6798         str [1] = ' ';
6799         str [2] = ' ';
6800         str [3] = ' ';
6801         str [4] = ' ';
6802         str [5] = ' ';
6803         str [6] = ' ';
6804         str [7] = ' ';
6805         str [8] = ' ';
6806         str [9] = ' ';
6807         str [10] = 'U';
6808         str [11] = 'S';
6809         str [12] = 'A';
6810         TrimSpacesAroundString (ssp->name);
6811       }
6812     } else if (ssp->subtype == SUBSRC_clone) {
6813       CleanVisStringJunk (&(ssp->name));
6814     } else if (ssp->subtype == SUBSRC_altitude) {
6815       if (ssp->name != NULL && (! AltitudeIsValid (ssp->name))) {
6816         CleanupAltitude (ssp);
6817       }
6818     } else if (ssp->subtype == SUBSRC_lat_lon) {
6819       /*
6820       str = ssp->name;
6821       if (str != NULL) {
6822         ptr = StringStr (str, " N, ");
6823         if (ptr == NULL) {
6824           ptr = StringStr (str, " S, ");
6825         }
6826         if (ptr != NULL) {
6827           ptr += 2;
6828           *ptr = ' ';
6829           Asn2gnbkCompressSpaces (str);
6830         }
6831       }
6832       */
6833       /*
6834       if (str != NULL && sscanf (str, "%lf %c, %lf %c%n", &ns, &lat, &ew, &lon, &processed) == 4 && processed == StringLen (str)) {
6835         ptr = StringChr (str, ',');
6836         if (ptr != NULL) {
6837           *ptr = ' ';
6838           Asn2gnbkCompressSpaces (str);
6839         }
6840       }
6841       */
6842     } else if (ssp->subtype == SUBSRC_other && StringDoesHaveText (ssp->name)) {
6843       CorrectTildes (&(ssp->name));
6844     } else if (ssp->subtype == SUBSRC_sex) {
6845       ptr = ssp->name;
6846       if (StringDoesHaveText (ptr)) {
6847         ch = *ptr;
6848         while (ch != '\0') {
6849           ch = TO_LOWER(ch);
6850           *ptr = ch;
6851           ptr++;
6852           ch = *ptr;
6853         }
6854         ptr = ssp->name;
6855         for (i = 0; sex_conv[i].from != NULL; i++) {
6856           if (StringCmp (ptr, sex_conv[i].from) == 0) {
6857             ssp->name = MemFree (ssp->name);
6858             ssp->name = StringSave (sex_conv[i].to);
6859             break;
6860           }
6861         }
6862       }
6863     } else if (ssp->subtype == SUBSRC_collection_date) {
6864       ptr = ssp->name;
6865       if (StringDoesHaveText (ptr)) {
6866         CorrectMonthCapitalization (ptr);
6867       }
6868     }
6869     if (ssp->subtype == SUBSRC_fwd_primer_seq ||
6870         ssp->subtype == SUBSRC_rev_primer_seq) {
6871       if (ssp->name != NULL) {
6872         /* upper case sequence */
6873         ptr = ssp->name;
6874         ch = *ptr;
6875         while (ch != '\0') {
6876           if (IS_UPPER (ch)) {
6877             *ptr = TO_LOWER (ch);
6878           }
6879           ptr++;
6880           ch = *ptr;
6881         }
6882         /* remove any spaces in sequence outisde of <modified base> */
6883         src = ssp->name;
6884         dst = ssp->name;
6885         ch = *src;
6886         while (ch != '\0') {
6887           if (ch == '<') {
6888             in_brackets = TRUE;
6889             *dst = ch;
6890             dst++;
6891           } else if (ch == '>') {
6892             in_brackets = FALSE;
6893             *dst = ch;
6894             dst++;
6895           } else if (ch != ' ') {
6896             *dst = ch;
6897             dst++;
6898           } else if (in_brackets) {
6899             *dst = ch;
6900             dst++;
6901           }
6902           src++;
6903           ch = *src;
6904         }
6905         *dst = '\0';
6906         /* upper case modified base <OTHER> */
6907         ptr = ssp->name;
6908         tmp = StringStr (ptr, "<other>");
6909         while (tmp != NULL) {
6910           ptr = tmp + 7;
6911           for (i = 1; i < 6; i++) {
6912             ch = tmp [i];
6913             tmp [i] = TO_UPPER (ch);
6914           }
6915           tmp = StringStr (ptr, "<other>");
6916         }
6917       }
6918     }
6919     /*
6920     if (ssp->subtype == SUBSRC_fwd_primer_seq) {
6921       if (fwd_seq == NULL) {
6922         fwd_seq = ssp;
6923       } else {
6924         fwd_seq->name = CombineSplitQual (fwd_seq->name, ssp->name);
6925         unlink = TRUE;
6926       }
6927     }
6928     if (ssp->subtype == SUBSRC_rev_primer_seq) {
6929       if (rev_seq == NULL) {
6930         rev_seq = ssp;
6931       } else {
6932         rev_seq->name = CombineSplitQual (rev_seq->name, ssp->name);
6933         unlink = TRUE;
6934       }
6935     }
6936     if (ssp->subtype == SUBSRC_fwd_primer_name) {
6937       if (fwd_name == NULL) {
6938         fwd_name = ssp;
6939       } else {
6940         fwd_name->name = CombineSplitQual (fwd_name->name, ssp->name);
6941         unlink = TRUE;
6942       }
6943     }
6944     if (ssp->subtype == SUBSRC_rev_primer_name) {
6945       if (rev_name == NULL) {
6946         rev_name = ssp;
6947       } else {
6948         rev_name->name = CombineSplitQual (rev_name->name, ssp->name);
6949         unlink = TRUE;
6950       }
6951     }
6952     */
6953     CleanVisString (&(ssp->attrib));
6954     if (last != NULL) {
6955       if (HasNoText (ssp->name) && (! IsNoNameSubSource (ssp))) {
6956         unlink = TRUE;
6957       } else if (last->subtype == ssp->subtype &&
6958                  (IsNoNameSubSource (ssp) ||
6959                   StringICmp (last->name, ssp->name) == 0 ||
6960                   (last->subtype == SUBSRC_other &&
6961                    StringStr (last->name, ssp->name) != NULL))) {
6962         unlink = TRUE;
6963       } else if (last->subtype == ssp->subtype &&
6964                  last->subtype == SUBSRC_other &&
6965                  IsSubString (last->name, ssp->name)) {
6966         last->name = MemFree (last->name);
6967         last->name = ssp->name;
6968         ssp->name = NULL;
6969         unlink = TRUE;
6970       } else if (ssp->subtype == SUBSRC_plastid_name &&
6971                  location != 0
6972                  && location == LocationForPlastidText (ssp->name)) {
6973         unlink = TRUE;
6974       }
6975     } else if (HasNoText (ssp->name) && (! IsNoNameSubSource (ssp))) {
6976       unlink = TRUE;
6977     } else if (ssp->subtype == SUBSRC_plastid_name &&
6978                location != 0
6979                && location == LocationForPlastidText (ssp->name)) {
6980       unlink = TRUE;
6981     } else {
6982       last = ssp;
6983     }
6984     if (unlink) {
6985       *prev = ssp->next;
6986       ssp->next = NULL;
6987       SubSourceFree (ssp);
6988     } else {
6989       last = ssp;
6990       prev = &(ssp->next);
6991     }
6992     ssp = next;
6993   }
6994   /*
6995   if (fwd_seq != NULL) {
6996     if (StringChr (fwd_seq->name, ',') != NULL) {
6997       ptr = fwd_seq->name;
6998       len = StringLen (ptr);
6999       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7000         TrimParenthesesAndCommasAroundString (fwd_seq->name);
7001         str = MemNew (sizeof (Char) * (len + 4));
7002         if (str != NULL) {
7003           StringCpy (str, "(");
7004           StringCat (str, fwd_seq->name);
7005           StringCat (str, ")");
7006           fwd_seq->name = MemFree (fwd_seq->name);
7007           fwd_seq->name = str;
7008         }
7009       }
7010     }
7011   }
7012   if (rev_seq != NULL) {
7013     if (StringChr (rev_seq->name, ',') != NULL) {
7014       ptr = rev_seq->name;
7015       len = StringLen (ptr);
7016       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7017         TrimParenthesesAndCommasAroundString (rev_seq->name);
7018         str = MemNew (sizeof (Char) * (len + 4));
7019         if (str != NULL) {
7020           StringCpy (str, "(");
7021           StringCat (str, rev_seq->name);
7022           StringCat (str, ")");
7023           rev_seq->name = MemFree (rev_seq->name);
7024           rev_seq->name = str;
7025         }
7026       }
7027     }
7028   }
7029   if (fwd_name != NULL) {
7030     if (StringChr (fwd_name->name, ',') != NULL) {
7031       ptr = fwd_name->name;
7032       len = StringLen (ptr);
7033       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7034         TrimParenthesesAndCommasAroundString (fwd_name->name);
7035         str = MemNew (sizeof (Char) * (len + 4));
7036         if (str != NULL) {
7037           StringCpy (str, "(");
7038           StringCat (str, fwd_name->name);
7039           StringCat (str, ")");
7040           fwd_name->name = MemFree (fwd_name->name);
7041           fwd_name->name = str;
7042         }
7043       }
7044     }
7045   }
7046   if (rev_name != NULL) {
7047     if (StringChr (rev_name->name, ',') != NULL) {
7048       ptr = rev_name->name;
7049       len = StringLen (ptr);
7050       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7051         TrimParenthesesAndCommasAroundString (rev_name->name);
7052         str = MemNew (sizeof (Char) * (len + 4));
7053         if (str != NULL) {
7054           StringCpy (str, "(");
7055           StringCat (str, rev_name->name);
7056           StringCat (str, ")");
7057           rev_name->name = MemFree (rev_name->name);
7058           rev_name->name = str;
7059         }
7060       }
7061     }
7062   }
7063   */
7064 }
7065 
7066 //LCOV_EXCL_START
CleanSubSourcePrimers(SubSourcePtr PNTR sspp)7067 extern void CleanSubSourcePrimers (SubSourcePtr PNTR sspp)
7068 
7069 {
7070   SubSourcePtr       fwd_seq = NULL, rev_seq = NULL, fwd_name = NULL, rev_name = NULL;
7071   size_t             len;
7072   SubSourcePtr       next;
7073   SubSourcePtr PNTR  prev;
7074   CharPtr            ptr;
7075   SubSourcePtr       ssp;
7076   CharPtr            str;
7077   Boolean            unlink;
7078 
7079   if (sspp == NULL) return;
7080   prev = sspp;
7081   ssp = *sspp;
7082   while (ssp != NULL) {
7083     next = ssp->next;
7084     unlink= FALSE;
7085     if (ssp->subtype == SUBSRC_fwd_primer_seq) {
7086       if (fwd_seq == NULL) {
7087         fwd_seq = ssp;
7088       } else {
7089         fwd_seq->name = CombineSplitQual (fwd_seq->name, ssp->name);
7090         unlink = TRUE;
7091       }
7092     }
7093     if (ssp->subtype == SUBSRC_rev_primer_seq) {
7094       if (rev_seq == NULL) {
7095         rev_seq = ssp;
7096       } else {
7097         rev_seq->name = CombineSplitQual (rev_seq->name, ssp->name);
7098         unlink = TRUE;
7099       }
7100     }
7101     if (ssp->subtype == SUBSRC_fwd_primer_name) {
7102       if (fwd_name == NULL) {
7103         fwd_name = ssp;
7104       } else {
7105         fwd_name->name = CombineSplitQual (fwd_name->name, ssp->name);
7106         unlink = TRUE;
7107       }
7108     }
7109     if (ssp->subtype == SUBSRC_rev_primer_name) {
7110       if (rev_name == NULL) {
7111         rev_name = ssp;
7112       } else {
7113         rev_name->name = CombineSplitQual (rev_name->name, ssp->name);
7114         unlink = TRUE;
7115       }
7116     }
7117     if (unlink) {
7118       *prev = ssp->next;
7119       ssp->next = NULL;
7120       SubSourceFree (ssp);
7121     } else {
7122       prev = &(ssp->next);
7123     }
7124     ssp = next;
7125   }
7126   if (fwd_seq != NULL) {
7127     if (StringChr (fwd_seq->name, ',') != NULL) {
7128       ptr = fwd_seq->name;
7129       len = StringLen (ptr);
7130       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7131         TrimParenthesesAndCommasAroundString (fwd_seq->name);
7132         str = MemNew (sizeof (Char) * (len + 4));
7133         if (str != NULL) {
7134           StringCpy (str, "(");
7135           StringCat (str, fwd_seq->name);
7136           StringCat (str, ")");
7137           fwd_seq->name = MemFree (fwd_seq->name);
7138           fwd_seq->name = str;
7139         }
7140       }
7141     }
7142   }
7143   if (rev_seq != NULL) {
7144     if (StringChr (rev_seq->name, ',') != NULL) {
7145       ptr = rev_seq->name;
7146       len = StringLen (ptr);
7147       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7148         TrimParenthesesAndCommasAroundString (rev_seq->name);
7149         str = MemNew (sizeof (Char) * (len + 4));
7150         if (str != NULL) {
7151           StringCpy (str, "(");
7152           StringCat (str, rev_seq->name);
7153           StringCat (str, ")");
7154           rev_seq->name = MemFree (rev_seq->name);
7155           rev_seq->name = str;
7156         }
7157       }
7158     }
7159   }
7160   if (fwd_name != NULL) {
7161     if (StringChr (fwd_name->name, ',') != NULL) {
7162       ptr = fwd_name->name;
7163       len = StringLen (ptr);
7164       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7165         TrimParenthesesAndCommasAroundString (fwd_name->name);
7166         str = MemNew (sizeof (Char) * (len + 4));
7167         if (str != NULL) {
7168           StringCpy (str, "(");
7169           StringCat (str, fwd_name->name);
7170           StringCat (str, ")");
7171           fwd_name->name = MemFree (fwd_name->name);
7172           fwd_name->name = str;
7173         }
7174       }
7175     }
7176   }
7177   if (rev_name != NULL) {
7178     if (StringChr (rev_name->name, ',') != NULL) {
7179       ptr = rev_name->name;
7180       len = StringLen (ptr);
7181       if (ptr [0] != '(' || ptr [len - 1] != ')') {
7182         TrimParenthesesAndCommasAroundString (rev_name->name);
7183         str = MemNew (sizeof (Char) * (len + 4));
7184         if (str != NULL) {
7185           StringCpy (str, "(");
7186           StringCat (str, rev_name->name);
7187           StringCat (str, ")");
7188           rev_name->name = MemFree (rev_name->name);
7189           rev_name->name = str;
7190         }
7191       }
7192     }
7193   }
7194 }
7195 //LCOV_EXCL_STOP
7196 
OrpModToOrgMod(ValNodePtr PNTR vnpp,OrgModPtr PNTR ompp)7197 static void OrpModToOrgMod (ValNodePtr PNTR vnpp, OrgModPtr PNTR ompp)
7198 
7199 {
7200   Char        ch;
7201   ValNodePtr  next;
7202   Int2        numcommas;
7203   Int2        numspaces;
7204   OrgModPtr   omp;
7205   CharPtr     ptr;
7206   CharPtr     str;
7207   CharPtr     val;
7208   ValNodePtr  vnp;
7209   Uint1       subtype;
7210 
7211   if (vnpp == NULL || ompp == NULL) return;
7212   vnp = *vnpp;
7213   while (vnp != NULL) {
7214     next = vnp->next;
7215     str = (CharPtr) vnp->data.ptrvalue;
7216     val = NULL;
7217     subtype = 0;
7218     StringHasOrgModPrefix (str, &val, &subtype, TRUE);
7219     if (val != NULL) {
7220       numspaces = 0;
7221       numcommas = 0;
7222       ptr = str;
7223       ch = *ptr;
7224       while (ch != '\0') {
7225         if (ch == ' ') {
7226           numspaces++;
7227         } else if (ch == ',') {
7228           numcommas++;
7229         }
7230         ptr++;
7231         ch = *ptr;
7232       }
7233       if (numspaces > 4 || numcommas > 0) {
7234         val = NULL;
7235       }
7236     }
7237     if (val != NULL) {
7238       omp = OrgModNew ();
7239       if (omp != NULL) {
7240         omp->subtype = (Uint1) subtype;
7241         omp->subname = StringSave (val);
7242         omp->next = *ompp;
7243         *ompp = omp;
7244       }
7245       *vnpp = vnp->next;
7246       vnp->next = NULL;
7247       ValNodeFreeData (vnp);
7248     } else {
7249       vnpp = &(vnp->next);
7250     }
7251     vnp = next;
7252   }
7253 }
7254 
StringHasSubSourcePrefix(CharPtr str,CharPtr PNTR pval,Uint1Ptr p_subtypeval,Boolean skippref)7255 static void StringHasSubSourcePrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
7256 {
7257   Int2          i;
7258   CharPtr       val = NULL;
7259   Uint1         subtype_val = 0;
7260 
7261   for (i = 0; current_subsource_subtype_alist[i].name != NULL && subtype_val == 0; i++) {
7262     val = StringHasPrefix (str, current_subsource_subtype_alist [i].name,
7263                            (Boolean) (current_subsource_subtype_alist[i].value == SUBSRC_germline ||
7264                                       current_subsource_subtype_alist[i].value == SUBSRC_rearranged ||
7265                                       current_subsource_subtype_alist[i].value == SUBSRC_transgenic ||
7266                                       current_subsource_subtype_alist[i].value == SUBSRC_environmental_sample ||
7267                                       current_subsource_subtype_alist[i].value == SUBSRC_metagenomic),
7268                            skippref);
7269     if (val != NULL) {
7270       subtype_val = current_subsource_subtype_alist[i].value;
7271     }
7272   }
7273   if (subtype_val == 0) {
7274     for (i = 0; subsource_aliases[i].name != NULL && subtype_val == 0; i++) {
7275       val = StringHasPrefix (str, subsource_aliases [i].alias,
7276                              (Boolean) (subsource_aliases[i].value == SUBSRC_germline ||
7277                                         subsource_aliases[i].value == SUBSRC_rearranged ||
7278                                         subsource_aliases[i].value == SUBSRC_transgenic ||
7279                                         subsource_aliases[i].value == SUBSRC_environmental_sample ||
7280                                         subsource_aliases[i].value == SUBSRC_metagenomic),
7281                              skippref);
7282       if (val != NULL) {
7283         subtype_val = subsource_aliases[i].value;
7284       }
7285     }
7286   }
7287   if (pval != NULL) {
7288     *pval = val;
7289   }
7290   if (p_subtypeval != NULL) {
7291     *p_subtypeval = subtype_val;
7292   }
7293 }
7294 
OrpModToSubSource(ValNodePtr PNTR vnpp,SubSourcePtr PNTR sspp)7295 static void OrpModToSubSource (ValNodePtr PNTR vnpp, SubSourcePtr PNTR sspp)
7296 
7297 {
7298   Char          ch;
7299   ValNodePtr    next;
7300   Int2          numcommas;
7301   Int2          numspaces;
7302   CharPtr       ptr;
7303   SubSourcePtr  ssp;
7304   CharPtr       str;
7305   CharPtr       val;
7306   ValNodePtr    vnp;
7307   Uint1         subtype_val = 0;
7308 
7309   if (vnpp == NULL || sspp == NULL) return;
7310   vnp = *vnpp;
7311   while (vnp != NULL) {
7312     next = vnp->next;
7313     str = (CharPtr) vnp->data.ptrvalue;
7314     val = NULL;
7315     subtype_val = 0;
7316     StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
7317 
7318     if (val != NULL) {
7319       numspaces = 0;
7320       numcommas = 0;
7321       ptr = str;
7322       ch = *ptr;
7323       while (ch != '\0') {
7324         if (ch == ' ') {
7325           numspaces++;
7326         } else if (ch == ',') {
7327           numcommas++;
7328         }
7329         ptr++;
7330         ch = *ptr;
7331       }
7332       if (numspaces > 4 || numcommas > 0) {
7333         val = NULL;
7334       }
7335     }
7336     if (val != NULL) {
7337       ssp = SubSourceNew ();
7338       if (ssp != NULL) {
7339         ssp->subtype = subtype_val;
7340         ssp->name = StringSave (val);
7341         ssp->next = *sspp;
7342         *sspp = ssp;
7343       }
7344       *vnpp = vnp->next;
7345       vnp->next = NULL;
7346       ValNodeFreeData (vnp);
7347     } else {
7348       vnpp = &(vnp->next);
7349     }
7350     vnp = next;
7351   }
7352 }
7353 
GbqualToOrpMod(GBQualPtr PNTR prevgbq,ValNodePtr PNTR vnpp)7354 static void GbqualToOrpMod (GBQualPtr PNTR prevgbq, ValNodePtr PNTR vnpp)
7355 
7356 {
7357   GBQualPtr  gbq;
7358   size_t     len;
7359   GBQualPtr  next;
7360   CharPtr    str;
7361   Boolean    unlink;
7362   CharPtr    val;
7363   Uint1      subtype_val;
7364 
7365   if (prevgbq == NULL) return;
7366   gbq = *prevgbq;
7367   while (gbq != NULL) {
7368     next = gbq->next;
7369     unlink = FALSE;
7370     str = gbq->qual;
7371     if (str != NULL) {
7372       val = NULL;
7373       subtype_val = 0;
7374       StringHasOrgModPrefix (str, &val, &subtype_val, FALSE);
7375       if (val == NULL) {
7376         subtype_val = 0;
7377         StringHasSubSourcePrefix (str, &val, &subtype_val, FALSE);
7378 
7379       }
7380       if (val != NULL) {
7381         len = StringLen (gbq->val);
7382         str = MemNew (sizeof (Char) * (len + 64));
7383         if (str != NULL) {
7384           StringCpy (str, val);
7385           StringCat (str, "=");
7386           StringCat (str, gbq->val);
7387           ValNodeAddStr (vnpp, 0, str);
7388           unlink = TRUE;
7389         }
7390       }
7391     }
7392     if (unlink) {
7393       *prevgbq = gbq->next;
7394       gbq->next = NULL;
7395       GBQualFree (gbq);
7396     } else {
7397       prevgbq = (GBQualPtr PNTR) &(gbq->next);
7398     }
7399     gbq = next;
7400   }
7401 }
7402 
7403 #define IS_WHITESP(c) (((c) == ' ') || ((c) == '\n') || ((c) == '\r') || ((c) == '\t'))
7404 
IsStringSingleToken(CharPtr str)7405 static Boolean IsStringSingleToken (CharPtr str)
7406 
7407 {
7408   Char  ch;
7409 
7410   if (StringHasNoText (str)) return FALSE;
7411 
7412   ch = *str;
7413   while (ch != '\0') {
7414     if (IS_WHITESP (ch)) return FALSE;
7415     str++;
7416     ch = *str;
7417   }
7418 
7419   return TRUE;
7420 }
7421 
FindAnOrgMod(OrgNamePtr onp,Uint1 subtype)7422 static CharPtr FindAnOrgMod (OrgNamePtr onp, Uint1 subtype)
7423 
7424 {
7425   OrgModPtr  omp;
7426 
7427   if (onp == NULL || subtype == 0) return NULL;
7428 
7429   for (omp = onp->mod; omp != NULL; omp = omp->next) {
7430     if (omp->subtype != subtype) continue;
7431     if (StringHasNoText (omp->subname)) continue;
7432     return omp->subname;
7433   }
7434 
7435   return NULL;
7436 }
7437 
FindASubSource(BioSourcePtr biop,Uint1 subtype)7438 static CharPtr FindASubSource (BioSourcePtr biop, Uint1 subtype)
7439 
7440 {
7441   SubSourcePtr  ssp;
7442 
7443   if (biop == NULL || subtype == 0) return NULL;
7444 
7445   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
7446     if (ssp->subtype != subtype) continue;
7447     if (StringHasNoText (ssp->name)) continue;
7448     return ssp->name;
7449   }
7450 
7451   return NULL;
7452 }
7453 
FindNextSingleTilde(CharPtr str)7454 static CharPtr FindNextSingleTilde (CharPtr str)
7455 
7456 {
7457   Char  ch;
7458 
7459   if (StringHasNoText (str)) return NULL;
7460 
7461   ch = *str;
7462   while (ch != '\0') {
7463     if (ch == ' ') {
7464       if (str [1] == '~') {
7465         str++;
7466         ch = *str;
7467         while (ch == '~') {
7468           str++;
7469           ch = *str;
7470         }
7471       } else {
7472         str++;
7473         ch = *str;
7474       }
7475     } else if (ch == '~') {
7476       if (str [1] != '~') return str;
7477       str++;
7478       ch = *str;
7479       while (ch == '~') {
7480         str++;
7481         ch = *str;
7482       }
7483     } else {
7484       str++;
7485       ch = *str;
7486     }
7487   }
7488 
7489   return NULL;
7490 }
7491 
SplitAtSingleTilde(CharPtr strs)7492 static ValNodePtr SplitAtSingleTilde (CharPtr strs)
7493 
7494 {
7495   ValNodePtr  head = NULL;
7496   CharPtr     ptr, str, tmp;
7497 
7498   if (StringHasNoText (strs)) return NULL;
7499 
7500   tmp = StringSave (strs);
7501   str = tmp;
7502 
7503   while (StringDoesHaveText (str)) {
7504     ptr = FindNextSingleTilde (str);
7505     if (ptr != NULL) {
7506       *ptr = '\0';
7507       ptr++;
7508     }
7509     TrimSpacesAroundString (str);
7510     ValNodeCopyStr (&head, 0, str);
7511     str = ptr;
7512   }
7513 
7514   MemFree (tmp);
7515   return head;
7516 }
7517 
MergeTildeStrings(ValNodePtr head)7518 static CharPtr MergeTildeStrings (ValNodePtr head)
7519 
7520 {
7521   size_t      len = 0;
7522   CharPtr     prefix = "", ptr, str;
7523   ValNodePtr  vnp;
7524 
7525   if (head == NULL) return NULL;
7526 
7527   for (vnp = head; vnp != NULL; vnp = vnp->next) {
7528     str = (CharPtr) vnp->data.ptrvalue;
7529     if (StringHasNoText (str)) continue;
7530     len += StringLen (str) + 1;
7531   }
7532   if (len < 1) return NULL;
7533 
7534   ptr = MemNew (sizeof (Char) * (len + 2));
7535   if (ptr == NULL) return NULL;
7536 
7537   for (vnp = head; vnp != NULL; vnp = vnp->next) {
7538     str = (CharPtr) vnp->data.ptrvalue;
7539     if (StringHasNoText (str)) continue;
7540     StringCat (ptr, prefix);
7541     StringCat (ptr, str);
7542     prefix = "~";
7543   }
7544 
7545   return ptr;
7546 }
7547 
7548 
CleanupOrgModOther(BioSourcePtr biop,OrgNamePtr onp)7549 static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
7550 
7551 {
7552   ValNodePtr      head, vnp;
7553   OrgModPtr       next;
7554   OrgModPtr       omp;
7555   OrgModPtr PNTR  prev;
7556   CharPtr         str;
7557   Uint1           subtype_val;
7558   CharPtr         tmp;
7559   Boolean         unlink;
7560   CharPtr         val;
7561 
7562   if (biop == NULL || onp == NULL) return;
7563 
7564   prev = &(onp->mod);
7565   omp = onp->mod;
7566   while (omp != NULL) {
7567     next = omp->next;
7568     unlink= FALSE;
7569     if (omp->subtype == ORGMOD_other) {
7570       str = omp->subname;
7571       head = SplitAtSingleTilde (str);
7572       for (vnp = head; vnp != NULL; vnp = vnp->next) {
7573         str = (CharPtr) vnp->data.ptrvalue;
7574         if (StringHasNoText (str)) continue;
7575         val = NULL;
7576         subtype_val = 0;
7577         StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
7578         if (val != NULL) {
7579           tmp = FindAnOrgMod (onp, subtype_val);
7580           if (tmp != NULL && StringICmp (tmp, val) == 0) {
7581             vnp->data.ptrvalue = NULL;
7582           }
7583         } else {
7584           subtype_val = 0;
7585           StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
7586           if (val != NULL) {
7587             tmp = FindASubSource (biop, subtype_val);
7588             if (tmp != NULL && StringICmp (tmp, val) == 0) {
7589               vnp->data.ptrvalue = NULL;
7590             }
7591           }
7592         }
7593       }
7594       str = MergeTildeStrings (head);
7595       ValNodeFreeData (head);
7596       omp->subname = MemFree (omp->subname);
7597       omp->subname = str;
7598       if (StringHasNoText (str)) {
7599         unlink = TRUE;
7600       }
7601     } else if (omp->subtype == ORGMOD_bio_material
7602                || omp->subtype == ORGMOD_culture_collection
7603                || omp->subtype == ORGMOD_specimen_voucher) {
7604       /*
7605       FixOrgModVoucher (omp);
7606       */
7607     }
7608     if (unlink) {
7609       *prev = omp->next;
7610       omp->next = NULL;
7611       OrgModFree (omp);
7612     } else {
7613       prev = &(omp->next);
7614     }
7615     omp = next;
7616   }
7617 }
7618 
CleanupSubSourceOther(BioSourcePtr biop,OrgNamePtr onp)7619 static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp)
7620 
7621 {
7622   ValNodePtr         head, vnp;
7623   SubSourcePtr       next;
7624   SubSourcePtr PNTR  prev;
7625   SubSourcePtr       ssp;
7626   CharPtr            str;
7627   Uint1              subtype_val;
7628   CharPtr            tmp;
7629   Boolean            unlink;
7630   CharPtr            val;
7631 
7632   if (biop == NULL /* || onp == NULL */ ) return;
7633 
7634   prev = &(biop->subtype);
7635   ssp = biop->subtype;
7636   while (ssp != NULL) {
7637     next = ssp->next;
7638     unlink = FALSE;
7639     if (ssp->subtype == SUBSRC_other) {
7640       str = ssp->name;
7641       head = SplitAtSingleTilde (str);
7642       for (vnp = head; vnp != NULL; vnp = vnp->next) {
7643         str = (CharPtr) vnp->data.ptrvalue;
7644         if (StringHasNoText (str)) continue;
7645         val = NULL;
7646         subtype_val = 0;
7647         StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
7648         if (val != NULL) {
7649           tmp = FindAnOrgMod (onp, subtype_val);
7650           if (tmp != NULL && StringICmp (tmp, val) == 0) {
7651             vnp->data.ptrvalue = NULL;
7652           }
7653         } else {
7654           subtype_val = 0;
7655           StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
7656           if (val != NULL) {
7657             tmp = FindASubSource (biop, subtype_val);
7658             if (tmp != NULL && StringICmp (tmp, val) == 0) {
7659               vnp->data.ptrvalue = NULL;
7660             }
7661           }
7662         }
7663       }
7664       str = MergeTildeStrings (head);
7665       ValNodeFreeData (head);
7666       ssp->name = MemFree (ssp->name);
7667       ssp->name = str;
7668       if (StringHasNoText (str)) {
7669         unlink = TRUE;
7670       }
7671     }
7672     if (unlink) {
7673       *prev = ssp->next;
7674       ssp->next = NULL;
7675       SubSourceFree (ssp);
7676     } else {
7677       prev = &(ssp->next);
7678     }
7679     ssp = next;
7680   }
7681 }
7682 
SortDbxref(VoidPtr ptr1,VoidPtr ptr2)7683 static int LIBCALLBACK SortDbxref (VoidPtr ptr1, VoidPtr ptr2)
7684 
7685 {
7686   int          compare;
7687   DbtagPtr     dbt1;
7688   DbtagPtr     dbt2;
7689   ObjectIdPtr  oip1;
7690   ObjectIdPtr  oip2;
7691   CharPtr      str1;
7692   CharPtr      str2;
7693   ValNodePtr   vnp1;
7694   ValNodePtr   vnp2;
7695 
7696   if (ptr1 == NULL || ptr2 == NULL) return 0;
7697   vnp1 = *((ValNodePtr PNTR) ptr1);
7698   vnp2 = *((ValNodePtr PNTR) ptr2);
7699   if (vnp1 == NULL || vnp2 == NULL) return 0;
7700   dbt1 = (DbtagPtr) vnp1->data.ptrvalue;
7701   dbt2 = (DbtagPtr) vnp2->data.ptrvalue;
7702   if (dbt1 == NULL || dbt2 == NULL) return 0;
7703   str1 = (CharPtr) dbt1->db;
7704   str2 = (CharPtr) dbt2->db;
7705   if (str1 == NULL || str2 == NULL) return 0;
7706   compare = StringICmp (str1, str2);
7707   if (compare != 0) return compare;
7708   oip1 = dbt1->tag;
7709   oip2 = dbt2->tag;
7710   if (oip1 == NULL || oip2 == NULL) return 0;
7711   str1 = oip1->str;
7712   str2 = oip2->str;
7713   if (str1 != NULL && str2 != NULL) {
7714     return StringICmp (str1, str2);
7715   } else if (str1 == NULL && str2 == NULL) {
7716     if (oip1->id > oip2->id) {
7717       return 1;
7718     } else if (oip1->id < oip2->id) {
7719       return -1;
7720     }
7721   } else if (str1 != NULL) {
7722     return 1;
7723   } else if (str2 != NULL) {
7724     return -1;
7725   }
7726   return 0;
7727 }
7728 
FixNumericDbxref(DbtagPtr dbt)7729 static void FixNumericDbxref (DbtagPtr dbt)
7730 
7731 {
7732   size_t       len;
7733   ObjectIdPtr  oip;
7734   CharPtr      ptr;
7735   long         val;
7736 
7737   if (dbt != NULL) {
7738     oip = dbt->tag;
7739     if (oip != NULL) {
7740       ptr = oip->str;
7741       if (ptr != NULL && *ptr != '0' && StringIsAllDigits(ptr)) {
7742         len = StringLen (ptr);
7743         if (len < 10 || (len == 10 && StringCmp (ptr, "2147483647") <= 0)) {
7744           if (sscanf (oip->str, "%ld", &val) == 1) {
7745             oip->id = (Int4) val;
7746             oip->str = MemFree (oip->str);
7747           }
7748         }
7749       }
7750     }
7751   }
7752 }
7753 
FixNumericDbxrefs(ValNodePtr vnp)7754 static void FixNumericDbxrefs (ValNodePtr vnp)
7755 
7756 {
7757   DbtagPtr  dbt;
7758 
7759   while (vnp != NULL) {
7760     dbt = (DbtagPtr) vnp->data.ptrvalue;
7761     if (dbt != NULL) {
7762       FixNumericDbxref (dbt);
7763     }
7764     vnp = vnp->next;
7765   }
7766 }
7767 
FixOldDbxref(DbtagPtr dbt)7768 static void FixOldDbxref (DbtagPtr dbt)
7769 
7770 {
7771   Boolean      all_digits;
7772   Char         buf [32];
7773   Char         ch;
7774   CharPtr      ident;
7775   size_t       len;
7776   ObjectIdPtr  oip;
7777   CharPtr      ptr;
7778   CharPtr      str;
7779 
7780   if (dbt != NULL) {
7781 
7782     TrimSpacesAroundString (dbt->db);
7783     oip = dbt->tag;
7784     if (oip != NULL && oip->str != NULL) {
7785       /*
7786       TrimSpacesAroundString (oip->str);
7787       */
7788       TrimSpacesSemicolonsAndCommas (oip->str);
7789     }
7790 
7791     if (StringICmp (dbt->db, "SWISS-PROT") == 0 &&
7792         StringCmp (dbt->db, "Swiss-Prot") != 0) {
7793       dbt->db = MemFree (dbt->db);
7794       dbt->db = StringSave ("Swiss-Prot");
7795     } else if (StringICmp (dbt->db, "SPTREMBL") == 0) {
7796       dbt->db = MemFree (dbt->db);
7797       dbt->db = StringSave ("TrEMBL");
7798     } else if (StringICmp (dbt->db, "SUBTILIS") == 0) {
7799       dbt->db = MemFree (dbt->db);
7800       dbt->db = StringSave ("SubtiList");
7801     } else if (StringICmp (dbt->db, "MGD") == 0) {
7802       dbt->db = MemFree (dbt->db);
7803       dbt->db = StringSave ("MGI");
7804     } else if (StringCmp (dbt->db, "cdd") == 0) {
7805       dbt->db = MemFree (dbt->db);
7806       dbt->db = StringSave ("CDD");
7807     } else if (StringCmp (dbt->db, "FlyBase") == 0) {
7808       dbt->db = MemFree (dbt->db);
7809       dbt->db = StringSave ("FLYBASE");
7810     } else if (StringCmp (dbt->db, "GENEDB") == 0) {
7811       dbt->db = MemFree (dbt->db);
7812       dbt->db = StringSave ("GeneDB");
7813     } else if (StringCmp (dbt->db, "GreengenesID") == 0) {
7814       dbt->db = MemFree (dbt->db);
7815       dbt->db = StringSave ("Greengenes");
7816     } else if (StringCmp (dbt->db, "HMPID") == 0) {
7817       dbt->db = MemFree (dbt->db);
7818       dbt->db = StringSave ("HMP");
7819     }
7820     if (StringICmp (dbt->db, "HPRD") == 0) {
7821       oip = dbt->tag;
7822       if (oip != NULL && StringDoesHaveText (oip->str)) {
7823         str = oip->str;
7824         if (str != NULL && StringNICmp (str, "HPRD_", 5) == 0) {
7825           str [0] = ' ';
7826           str [1] = ' ';
7827           str [2] = ' ';
7828           str [3] = ' ';
7829           str [4] = ' ';
7830           TrimSpacesAroundString (str);
7831         }
7832       }
7833     } else if (StringICmp (dbt->db, "MGI") == 0) {
7834       oip = dbt->tag;
7835       if (oip != NULL && oip->str != NULL && StringDoesHaveText (oip->str)) {
7836         str = oip->str;
7837         if (StringNICmp (str, "MGI:", 4) == 0 || StringNICmp (str, "MGD:", 4) == 0) {
7838           str [0] = ' ';
7839           str [1] = ' ';
7840           str [2] = ' ';
7841           str [3] = ' ';
7842           TrimSpacesAroundString (str);
7843         } else if (StringNICmp (str, "J:", 2) == 0) {
7844           ptr = str + 2;
7845           ch = *ptr;
7846           all_digits = TRUE;
7847           while (ch != '\0') {
7848             if (! IS_DIGIT (ch)) {
7849               all_digits = FALSE;
7850             }
7851             ptr++;
7852             ch = *ptr;
7853           }
7854           if (all_digits) {
7855             oip->str = MemFree (oip->str);
7856             oip->str = StringSave ("");
7857           }
7858         }
7859       }
7860     }
7861     if (StringICmp (dbt->db, "Swiss-Prot") == 0 ||
7862         StringICmp (dbt->db, "SWISSPROT") == 0) {
7863       dbt->db = MemFree (dbt->db);
7864       dbt->db = StringSave ("UniProt/Swiss-Prot");
7865     } else if (StringICmp (dbt->db, "TrEMBL") == 0) {
7866       dbt->db = MemFree (dbt->db);
7867       dbt->db = StringSave ("UniProt/TrEMBL");
7868     } else if (StringICmp (dbt->db, "LocusID") == 0) {
7869       dbt->db = MemFree (dbt->db);
7870       dbt->db = StringSave ("GeneID");
7871     } else if (StringICmp (dbt->db, "MaizeDB") == 0) {
7872       dbt->db = MemFree (dbt->db);
7873       dbt->db = StringSave ("MaizeGDB");
7874     }
7875     if (StringICmp (dbt->db, "UniProt/Swiss-Prot") == 0) {
7876       dbt->db = MemFree (dbt->db);
7877       dbt->db = StringSave ("UniProtKB/Swiss-Prot");
7878     } else if (StringICmp (dbt->db, "UniProt/TrEMBL") == 0) {
7879       dbt->db = MemFree (dbt->db);
7880       dbt->db = StringSave ("UniProtKB/TrEMBL");
7881     } else if (StringICmp (dbt->db, "Genew") == 0) {
7882       dbt->db = MemFree (dbt->db);
7883       dbt->db = StringSave ("HGNC");
7884     } else if (StringICmp (dbt->db, "IFO") == 0) {
7885       dbt->db = MemFree (dbt->db);
7886       dbt->db = StringSave ("NBRC");
7887     } else if (StringICmp (dbt->db, "BHB") == 0 ||
7888         StringICmp (dbt->db, "BioHealthBase") == 0) {
7889       dbt->db = MemFree (dbt->db);
7890       dbt->db = StringSave ("IRD");
7891     }
7892 
7893     oip = dbt->tag;
7894     if (oip != NULL && oip->str != NULL) {
7895       ident = oip->str;
7896       if (StringCmp (dbt->db, "HGNC") == 0 && StringNCmp (ident, "HGNC:", 5) == 0 ) {
7897         ident += 5;
7898         ptr = StringSave (ident);
7899         oip->str = MemFree (oip->str);
7900         oip->str = ptr;
7901       } else if (StringCmp (dbt->db, "VGNC") == 0 && StringNCmp (ident, "VGNC:", 5) == 0 ) {
7902         ident += 5;
7903         ptr = StringSave (ident);
7904         oip->str = MemFree (oip->str);
7905         oip->str = ptr;
7906       } else if (StringCmp (dbt->db, "MGI") == 0 && StringNCmp (ident, "MGI:", 4) == 0 ) {
7907         ident += 4;
7908         ptr = StringSave (ident);
7909         oip->str = MemFree (oip->str);
7910         oip->str = ptr;
7911       } else if (StringCmp (dbt->db, "RGD") == 0 && StringNCmp (ident, "RGD:", 4) == 0 ) {
7912         ident += 4;
7913         ptr = StringSave (ident);
7914         oip->str = MemFree (oip->str);
7915         oip->str = ptr;
7916       }
7917     }
7918     if (oip != NULL) {
7919       if (StringCmp (dbt->db, "HGNC") == 0 || StringCmp (dbt->db, "VGNC") == 0 || StringCmp (dbt->db, "MGI") == 0) {
7920         if (oip->str == NULL && oip->id > 0) {
7921           sprintf (buf, "%ld", (long) oip->id);
7922           ptr = StringSave (buf);
7923           oip->id = 0;
7924           oip->str = ptr;
7925         }
7926         ident = oip->str;
7927         if (ident != NULL) {
7928           if (StringChr (ident, ':') == NULL) {
7929             len = StringLen (dbt->db) + StringLen (ident) + 5;
7930             ptr = (CharPtr) MemNew (sizeof (Char) * len);
7931             if (ptr != NULL) {
7932               sprintf (ptr, "%s:%s", dbt->db, ident);
7933               oip->str = MemFree (oip->str);
7934               oip->str = ptr;
7935             }
7936           }
7937         }
7938       }
7939     }
7940   }
7941 }
7942 
FixOldDbxrefs(ValNodePtr vnp,Boolean isEmblOrDdbj)7943 static void FixOldDbxrefs (ValNodePtr vnp, Boolean isEmblOrDdbj)
7944 
7945 {
7946   DbtagPtr     dbt;
7947   ObjectIdPtr  oip;
7948   CharPtr      ptr;
7949   CharPtr      tmp;
7950   ValNodePtr   vp2;
7951 
7952   while (vnp != NULL) {
7953     dbt = (DbtagPtr) vnp->data.ptrvalue;
7954     if (dbt != NULL) {
7955       FixOldDbxref (dbt);
7956 
7957       if (! isEmblOrDdbj) {
7958         if (StringCmp (dbt->db, "HGNC") != 0 && StringCmp (dbt->db, "VGNC") != 0 && StringCmp (dbt->db, "MGI") != 0) {
7959           /* expand db_xrefs with colons inside tags */
7960           oip = dbt->tag;
7961           if (oip != NULL && oip->str != NULL) {
7962             ptr = StringChr (oip->str, ':');
7963             if (ptr != NULL) {
7964               if (StringHasNoText (ptr + 1)) {
7965                 *ptr = '\0';
7966               } else {
7967                 tmp = dbt->db;
7968                 dbt = DbtagNew ();
7969                 if (dbt != NULL) {
7970                   oip = ObjectIdNew ();
7971                   if (oip != NULL) {
7972                     vp2 = ValNodeNew (NULL);
7973                     if (vp2 != NULL) {
7974                       *ptr = '\0';
7975                       ptr++;
7976                       TrimSpacesAroundString (ptr);
7977                       dbt->db = StringSave (tmp);
7978                       oip->str = StringSave (ptr);
7979                       dbt->tag = oip;
7980                       vp2->data.ptrvalue = (Pointer) dbt;
7981                       vp2->next = vnp->next;
7982                       vnp->next = vp2;
7983                     }
7984                   }
7985                 }
7986               }
7987             }
7988           }
7989         }
7990       }
7991     }
7992 
7993     vnp = vnp->next;
7994   }
7995 }
7996 
CleanupDuplicateDbxrefs(ValNodePtr PNTR prevvnp)7997 static void CleanupDuplicateDbxrefs (ValNodePtr PNTR prevvnp)
7998 
7999 {
8000   DbtagPtr     dbt;
8001   DbtagPtr     last = NULL;
8002   ValNodePtr   nextvnp;
8003   ObjectIdPtr  oip1;
8004   ObjectIdPtr  oip2;
8005   CharPtr      str1;
8006   CharPtr      str2;
8007   Boolean      unlink;
8008   ValNodePtr   vnp;
8009 
8010   if (prevvnp == NULL) return;
8011   vnp = *prevvnp;
8012   while (vnp != NULL) {
8013     nextvnp = vnp->next;
8014     dbt = (DbtagPtr) vnp->data.ptrvalue;
8015     if (dbt != NULL) {
8016       unlink = FALSE;
8017       if (last != NULL) {
8018         str1 = (CharPtr) dbt->db;
8019         str2 = (CharPtr) last->db;
8020         if (str1 != NULL && str2 != NULL && StringICmp (str1, str2) == 0) {
8021           oip1 = dbt->tag;
8022           oip2 = last->tag;
8023           if (oip1 != NULL && oip2 != NULL) {
8024             str1 = oip1->str;
8025             str2 = oip2->str;
8026             if (str1 != NULL && str2 != NULL) {
8027               if (StringICmp (str1, str2) == 0) {
8028                 unlink = TRUE;
8029               }
8030             } else if (str1 == NULL && str2 == NULL) {
8031               if (oip1->id == oip2->id) {
8032                 unlink = TRUE;
8033               }
8034             }
8035           }
8036         }
8037       } else {
8038         last = dbt;
8039       }
8040       if (unlink) {
8041         *prevvnp = vnp->next;
8042         vnp->next = NULL;
8043         DbtagFree (dbt);
8044         ValNodeFree (vnp);
8045       } else {
8046         last = dbt;
8047         prevvnp = (ValNodePtr PNTR) &(vnp->next);
8048       }
8049     }
8050     vnp = nextvnp;
8051   }
8052 }
8053 
CleanupObsoleteDbxrefs(ValNodePtr PNTR prevvnp)8054 static void CleanupObsoleteDbxrefs (ValNodePtr PNTR prevvnp)
8055 
8056 {
8057   DbtagPtr     dbt;
8058   ValNodePtr   nextvnp;
8059   ObjectIdPtr  oip;
8060   CharPtr      str;
8061   Boolean      unlink;
8062   ValNodePtr   vnp;
8063 
8064   if (prevvnp == NULL) return;
8065   vnp = *prevvnp;
8066   while (vnp != NULL) {
8067     nextvnp = vnp->next;
8068     dbt = (DbtagPtr) vnp->data.ptrvalue;
8069     if (dbt != NULL) {
8070       unlink = FALSE;
8071       str = (CharPtr) dbt->db;
8072       if (StringHasNoText (str) ||
8073           StringICmp (str, "PID") == 0 ||
8074           StringICmp (str, "PIDg") == 0 ||
8075           /*
8076           StringICmp (str, "PIDe") == 0 ||
8077           StringICmp (str, "PIDd") == 0 ||
8078           */
8079           /*
8080           StringICmp (str, "GI") == 0 ||
8081           */
8082           StringICmp (str, "NID") == 0) {
8083         unlink = TRUE;
8084       }
8085       oip = dbt->tag;
8086       if (oip == NULL) {
8087         unlink = TRUE;
8088       } else if (oip->str != NULL) {
8089         if (StringHasNoText (oip->str)) {
8090           unlink = TRUE;
8091         }
8092       } else if (oip->id == 0) {
8093         unlink = TRUE;
8094       }
8095       if (unlink) {
8096         *prevvnp = vnp->next;
8097         vnp->next = NULL;
8098         DbtagFree (dbt);
8099         ValNodeFree (vnp);
8100       } else {
8101         prevvnp = (ValNodePtr PNTR) &(vnp->next);
8102       }
8103     }
8104     vnp = nextvnp;
8105   }
8106 }
8107 
CleanupGoDbxrefs(ValNodePtr vnp)8108 static void CleanupGoDbxrefs (ValNodePtr vnp)
8109 
8110 {
8111   DbtagPtr     dbt;
8112   size_t       idx;
8113   size_t       len;
8114   ObjectIdPtr  oip;
8115   CharPtr      ptr;
8116   Char         tmp [32];
8117 
8118   while (vnp != NULL) {
8119     dbt = (DbtagPtr) vnp->data.ptrvalue;
8120     if (dbt != NULL) {
8121       if (StringICmp (dbt->db, "GO") == 0) {
8122         oip = dbt->tag;
8123         if (oip != NULL) {
8124           if (oip->str == NULL && oip->id > 0) {
8125             sprintf (tmp, "%ld", (long) oip->id);
8126             oip->str = StringSave (tmp);
8127             oip->id = 0;
8128           }
8129           ptr = oip->str;
8130           if (ptr != NULL && StringIsAllDigits(ptr)) {
8131             len = StringLen (ptr);
8132             if (len < 7) {
8133               idx = 7 - len;
8134               StringCpy (tmp, "0000000");
8135               tmp [idx] = '\0';
8136               StringCat (tmp, ptr);
8137               oip->str = MemFree (oip->str);
8138               oip->str = StringSave (tmp);
8139             }
8140           }
8141         }
8142       }
8143     }
8144     vnp = vnp->next;
8145   }
8146 }
8147 
SortCits(VoidPtr ptr1,VoidPtr ptr2)8148 static int LIBCALLBACK SortCits (VoidPtr ptr1, VoidPtr ptr2)
8149 
8150 {
8151   int         compare;
8152   Char        label1 [128], label2 [128];
8153   ValNodePtr  ppr1, ppr2;
8154 
8155   if (ptr1 == NULL || ptr2 == NULL) return 0;
8156   ppr1 = *((ValNodePtr PNTR) ptr1);
8157   ppr2 = *((ValNodePtr PNTR) ptr2);
8158   if (ppr1 == NULL || ppr2 == NULL) return 0;
8159   PubLabel (ppr1, label1, 127, OM_LABEL_CONTENT);
8160   PubLabel (ppr2, label2, 127, OM_LABEL_CONTENT);
8161   compare = StringICmp (label1, label2);
8162   return compare;
8163 }
8164 
CitGenTitlesMatch(ValNodePtr pub1,ValNodePtr pub2)8165 static Boolean CitGenTitlesMatch (ValNodePtr pub1, ValNodePtr pub2)
8166 
8167 {
8168   CitGenPtr  cgp1, cgp2;
8169 
8170   if (pub1->choice == PUB_Gen) {
8171     cgp1 = (CitGenPtr) pub1->data.ptrvalue;
8172     if (cgp1->serial_number != -1 && pub1->next != NULL) {
8173       pub1 = pub1->next;
8174     }
8175   }
8176   if (pub2->choice == PUB_Gen) {
8177     cgp2 = (CitGenPtr) pub2->data.ptrvalue;
8178     if (cgp2->serial_number != -1 && pub2->next != NULL) {
8179       pub2 = pub2->next;
8180     }
8181   }
8182 
8183   if (pub1->choice != PUB_Gen || pub2->choice != PUB_Gen) return TRUE;
8184   cgp1 = (CitGenPtr) pub1->data.ptrvalue;
8185   cgp2 = (CitGenPtr) pub2->data.ptrvalue;
8186   if (cgp1->title == NULL || cgp2->title == NULL) return TRUE;
8187   if (StringCmp (cgp1->title, cgp2->title) != 0) return FALSE;
8188   return TRUE;
8189 }
8190 
CleanupDuplicateCits(ValNodePtr PNTR prevvnp)8191 static void CleanupDuplicateCits (ValNodePtr PNTR prevvnp)
8192 
8193 {
8194   Char        label1 [128], label2 [128];
8195   ValNodePtr  last = NULL;
8196   ValNodePtr  nextvnp;
8197   Boolean     unlink;
8198   ValNodePtr  vnp;
8199 
8200   if (prevvnp == NULL) return;
8201   vnp = *prevvnp;
8202   while (vnp != NULL) {
8203     nextvnp = vnp->next;
8204     unlink = FALSE;
8205     if (last != NULL) {
8206       PubLabelUnique (last, label1, 127, OM_LABEL_CONTENT, TRUE);
8207       PubLabelUnique (vnp, label2, 127, OM_LABEL_CONTENT, TRUE);
8208       if (StringCmp (label1, label2) == 0 && CitGenTitlesMatch (last, vnp)) {
8209         unlink = TRUE;
8210       }
8211     } else {
8212       last = vnp;
8213     }
8214     if (unlink) {
8215       *prevvnp = vnp->next;
8216       vnp->next = NULL;
8217       PubFree (vnp);
8218     } else {
8219       last = vnp;
8220       prevvnp = (ValNodePtr PNTR) &(vnp->next);
8221     }
8222     vnp = nextvnp;
8223   }
8224 }
8225 
8226 /* name processing code from Sequin editors */
8227 
FirstNameToInitials(CharPtr first,CharPtr inits,size_t maxsize)8228 NLM_EXTERN void FirstNameToInitials (CharPtr first, CharPtr inits, size_t maxsize)
8229 
8230 {
8231   Char  ch;
8232   Uint2  i;
8233 
8234   if (inits != NULL && maxsize > 0) {
8235     inits [0] = '\0';
8236     if (first != NULL) {
8237       i = 0;
8238       ch = *first;
8239       while (ch != '\0' && i < maxsize) {
8240         while (ch != '\0' && (ch <= ' ' || ch == '-')) {
8241           first++;
8242           ch = *first;
8243         }
8244         if (IS_ALPHA (ch)) {
8245           inits [i] = ch;
8246           i++;
8247           first++;
8248           ch = *first;
8249         }
8250         while (ch != '\0' && ch > ' ' && ch != '-') {
8251           first++;
8252           ch = *first;
8253         }
8254         if (ch == '-') {
8255           inits [i] = ch;
8256           i++;
8257           first++;
8258           ch = *first;
8259         }
8260       }
8261       inits [i] = '\0';
8262     }
8263   }
8264 }
8265 
StripPeriods(CharPtr str)8266 static void StripPeriods (CharPtr str)
8267 
8268 {
8269   Char     ch;
8270   CharPtr  dst;
8271 
8272   if (str != NULL) {
8273     dst = str;
8274     ch = *str;
8275     while (ch != '\0') {
8276       if (ch != '.') {
8277         *dst = ch;
8278         dst++;
8279       }
8280       str++;
8281       ch = *str;
8282     }
8283     *dst = '\0';
8284   }
8285 }
8286 
TrimLeadingSpaces(CharPtr str)8287 static void TrimLeadingSpaces (CharPtr str)
8288 
8289 {
8290   Char     ch;
8291   CharPtr  dst;
8292 
8293   if (str != NULL && str [0] != '\0') {
8294     dst = str;
8295     ch = *str;
8296     while (ch != '\0' && ch <= ' ') {
8297       str++;
8298       ch = *str;
8299     }
8300     while (ch != '\0') {
8301       *dst = ch;
8302       dst++;
8303       str++;
8304       ch = *str;
8305     }
8306     *dst = '\0';
8307   }
8308 }
8309 
ExtractSuffixFromInitials(NameStdPtr nsp)8310 static void ExtractSuffixFromInitials (NameStdPtr nsp)
8311 
8312 {
8313   Char     ch;
8314   Boolean  has_period = FALSE;
8315   size_t   len;
8316   CharPtr  str;
8317 
8318   str = nsp->names [4];
8319   ch = *str;
8320   while (ch != '\0') {
8321     if (ch == '.') {
8322       has_period = TRUE;
8323     }
8324     str++;
8325     ch = *str;
8326   }
8327   if (! has_period) return;
8328   str = nsp->names [4];
8329   len = StringLen (str);
8330   if (len >= 4 && StringCmp (str +  len - 3, "III") == 0) {
8331     str [len - 3] = '\0';
8332     nsp->names [5] = StringSave ("III");
8333   } else if (len >= 5 && StringCmp (str +  len - 4, "III.") == 0) {
8334     str [len - 4] = '\0';
8335     nsp->names [5] = StringSave ("III");
8336   } else if (len >= 3 && StringCmp (str +  len - 2, "Jr") == 0) {
8337     str [len - 2] = '\0';
8338     nsp->names [5] = StringSave ("Jr");
8339   } else if (len >= 4 && StringCmp (str +  len - 3, "2nd") == 0) {
8340     str [len - 3] = '\0';
8341     nsp->names [5] = StringSave ("II");
8342   } else if (len >= 3 && StringCmp (str +  len - 2, "IV") == 0) {
8343     str [len - 2] = '\0';
8344     nsp->names [5] = StringSave ("IV");
8345   } else if (len >= 4 && StringCmp (str +  len - 3, "IV.") == 0) {
8346     str [len - 3] = '\0';
8347     nsp->names [5] = StringSave ("IV");
8348   }
8349 }
8350 
NameStdPtrToTabbedString(NameStdPtr nsp,Boolean fixInitials)8351 static CharPtr NameStdPtrToTabbedString (NameStdPtr nsp, Boolean fixInitials)
8352 
8353 {
8354   Char   first [256];
8355   Char   frstinits [64];
8356   Char   initials [64];
8357   Int2   j;
8358   Char   last [256];
8359   Char   middle [128];
8360   Char   str [512];
8361   Char   suffix [64];
8362 
8363   if (nsp == NULL) return NULL;
8364   if (nsp->names [5] == NULL && nsp->names [4] != NULL) {
8365     ExtractSuffixFromInitials (nsp);
8366   }
8367   str [0] = '\0';
8368   StringNCpy_0 (first, nsp->names [1], sizeof (first));
8369   TrimSpacesAroundString (first);
8370   StringNCpy_0 (initials, nsp->names [4], sizeof (initials));
8371   StripPeriods (initials);
8372   TrimLeadingSpaces (initials);
8373   StringNCpy_0 (last, nsp->names [0], sizeof (last));
8374   TrimLeadingSpaces (last);
8375   StringNCpy_0 (middle, nsp->names [2], sizeof (middle));
8376   TrimLeadingSpaces (middle);
8377   if (StringCmp (initials, "al") == 0 &&
8378       StringCmp (last, "et") == 0 &&
8379       first [0] == '\0') {
8380     initials [0] = '\0';
8381     StringCpy (last, "et al.");
8382   }
8383   /*
8384   if (first [0] == '\0') {
8385     StringNCpy_0 (first, initials, sizeof (first));
8386     if (IS_ALPHA (first [0])) {
8387       if (first [1] == '-') {
8388         first [3] = '\0';
8389       } else {
8390         first [1] = '\0';
8391       }
8392     } else {
8393       first [0] = '\0';
8394     }
8395   }
8396   */
8397   frstinits [0] = '\0';
8398   FirstNameToInitials (first, frstinits, sizeof (frstinits) - 1);
8399   StripPeriods (first);
8400   TrimLeadingSpaces (first);
8401   if (first [0] != '\0') {
8402     StringCat (str, first);
8403   } else {
8404     /*
8405     StringCat (str, " ");
8406     */
8407   }
8408   StringCat (str, "\t");
8409   if (fixInitials) {
8410     j = 0;
8411     while (initials [j] != '\0' && TO_UPPER (initials [j]) == TO_UPPER (frstinits [j])) {
8412       j++;
8413     }
8414     if (initials [j] != '\0') {
8415       StringCat (str, initials + j);
8416     } else {
8417       /*
8418       StringCat (str, " ");
8419       */
8420     }
8421   } else if (initials [0] != '\0') {
8422     StringCat (str, initials);
8423   } else if (frstinits [0] != '\0') {
8424     StringCat (str, frstinits);
8425   }
8426   StringCat (str, "\t");
8427   StringCat (str, last);
8428   StringNCpy_0 (suffix, nsp->names [5], sizeof (suffix));
8429   StringCat (str, "\t");
8430   StripPeriods (suffix);
8431   TrimLeadingSpaces (suffix);
8432   if (suffix [0] != '\0') {
8433     StringCat (str, suffix);
8434   } else {
8435     /*
8436     StringCat (str, " ");
8437     */
8438   }
8439   StringCat (str, "\t");
8440   StringCat (str, middle);
8441   StringCat (str, "\n");
8442   return StringSave (str);
8443 }
8444 
XtractTagListColumn(CharPtr source,Int2 col)8445 static CharPtr XtractTagListColumn (CharPtr source, Int2 col)
8446 
8447 {
8448   Char     ch;
8449   size_t   count;
8450   CharPtr  ptr;
8451   CharPtr  str;
8452 
8453   if (source == NULL || source [0] == '\0' || col < 0) return NULL;
8454 
8455   ptr = source;
8456   ch = *ptr;
8457   while (col > 0 && ch != '\n' && ch != '\0') {
8458     while (ch != '\t' && ch != '\n' && ch != '\0') {
8459       ptr++;
8460       ch = *ptr;
8461     }
8462     if (ch == '\t') {
8463       ptr++;
8464       ch = *ptr;
8465     }
8466     col--;
8467   }
8468 
8469   count = 0;
8470   ch = ptr [count];
8471   while (ch != '\t' && ch != '\n' && ch != '\0') {
8472     count++;
8473     ch = ptr [count];
8474   }
8475   str = (CharPtr) MemNew(count + 1);
8476   if (str != NULL) {
8477     MemCpy (str, ptr, count);
8478   }
8479   return str;
8480 }
8481 
TabbedStringToNameStdPtr(CharPtr txt,Boolean fixInitials)8482 static NameStdPtr TabbedStringToNameStdPtr (CharPtr txt, Boolean fixInitials)
8483 
8484 {
8485   Char        ch;
8486   CharPtr     first;
8487   Char        initials [64];
8488   Int2        j;
8489   Int2        k;
8490   Char        last;
8491   Int2        len;
8492   NameStdPtr  nsp;
8493   Char        periods [128];
8494   CharPtr     str;
8495   Char        str1 [64];
8496   Char        suffix [80];
8497 
8498   if (txt == NULL) return NULL;
8499   nsp = NameStdNew ();
8500   if (nsp == NULL) return NULL;
8501   nsp->names [0] = XtractTagListColumn (txt, 2);
8502   TrimLeadingSpaces (nsp->names [0]);
8503   first = XtractTagListColumn (txt, 0);
8504   StripPeriods (first);
8505   nsp->names [1] = StringSave (first);
8506   TrimLeadingSpaces (nsp->names [1]);
8507   str1 [0] = '\0';
8508   if (fixInitials) {
8509     FirstNameToInitials (first, str1, sizeof (str1) - 1);
8510   }
8511   str = XtractTagListColumn (txt, 1);
8512   StringNCat (str1, str, sizeof (str1) - 1);
8513   MemFree (str);
8514   j = 0;
8515   k = 0;
8516   ch = str1 [j];
8517   while (ch != '\0') {
8518     if (ch != ' ') {
8519       initials [k] = ch;
8520       k++;
8521     }
8522     j++;
8523     ch = str1 [j];
8524   }
8525   initials [k] = '\0';
8526   periods [0] = '\0';
8527           j = 0;
8528           ch = initials [j];
8529           while (ch != '\0') {
8530             if (ch == ',') {
8531               initials [j] = '.';
8532             }
8533             j++;
8534             ch = initials [j];
8535           }
8536           str = StringStr (initials, ".ST.");
8537           if (str != NULL) {
8538             *(str + 2) = 't';
8539           }
8540   j = 0;
8541   k = 0;
8542   ch = initials [j];
8543   while (ch != '\0') {
8544     if (ch == '-') {
8545       periods [k] = ch;
8546       k++;
8547       j++;
8548       ch = initials [j];
8549     } else if (ch == '.') {
8550       j++;
8551       ch = initials [j];
8552             } else if (ch == ' ') {
8553               j++;
8554               ch = initials [j];
8555     } else {
8556       periods [k] = ch;
8557               last = ch;
8558       k++;
8559       j++;
8560       ch = initials [j];
8561               if (ch == '\0') {
8562                 if (! (IS_LOWER (last))) {
8563                   periods [k] = '.';
8564                   k++;
8565                 }
8566               /* } else if (ch == '.' && initials [j + 1] == '\0') { */
8567               } else if (! (IS_LOWER (ch))) {
8568                 periods [k] = '.';
8569                 k++;
8570               }
8571     }
8572   }
8573   if (k > 0 && periods [k - 1] != '.') {
8574     periods [k] = '.';
8575     k++;
8576   }
8577   periods [k] = '\0';
8578   nsp->names [4] = StringSave (periods);
8579   TrimLeadingSpaces (nsp->names [4]);
8580   str = XtractTagListColumn (txt, 3);
8581   StringNCpy_0 (str1, str, sizeof (str1));
8582   MemFree (str);
8583   j = 0;
8584   k = 0;
8585   ch = str1 [j];
8586   while (ch != '\0') {
8587     if (ch != ' ') {
8588       suffix [k] = ch;
8589       k++;
8590     }
8591     j++;
8592     ch = str1 [j];
8593   }
8594   suffix [k] = '\0';
8595   if (suffix [0] != '\0') {
8596     len = StringLen (suffix);
8597     if (len > 0 && suffix [len - 1] == '.') {
8598       suffix [len - 1] = '\0';
8599     }
8600     if (StringICmp (suffix, "1d") == 0) {
8601       StringCpy (suffix, "I");
8602     } else if (StringICmp (suffix, "1st") == 0) {
8603       StringCpy (suffix, "I");
8604     } else if (StringICmp (suffix, "2d") == 0) {
8605       StringCpy (suffix, "2nd");
8606     } else if (StringICmp (suffix, "3d") == 0) {
8607       StringCpy (suffix, "3rd");
8608     } else if (StringICmp (suffix, "Sr") == 0) {
8609       StringCpy (suffix, "Sr.");
8610     } else if (StringICmp (suffix, "Jr") == 0) {
8611       StringCpy (suffix, "Jr.");
8612     }
8613     /*
8614     len = StringLen (suffix);
8615     if (len > 0 && suffix [len - 1] != '.') {
8616       StringCat (suffix, ".");
8617     }
8618     */
8619     nsp->names [5] = StringSave (suffix);
8620     TrimLeadingSpaces (nsp->names [5]);
8621   }
8622   if (StringCmp (nsp->names [0], "et al") == 0) {
8623     nsp->names [0] = MemFree (nsp->names [0]);
8624     nsp->names [0] = StringSave ("et al.");
8625   }
8626   nsp->names [2] = XtractTagListColumn (txt, 4);
8627   TrimLeadingSpaces (nsp->names [2]);
8628   if (StringHasNoText (nsp->names [2])) {
8629     nsp->names [2] = MemFree (nsp->names [2]);
8630   }
8631   MemFree (first);
8632   return nsp;
8633 }
8634 
CleanAffil(AffilPtr afp)8635 static AffilPtr CleanAffil (AffilPtr afp)
8636 
8637 {
8638   if (afp == NULL) return NULL;
8639   CleanVisStringJunkAndCompress (&(afp->affil));
8640   if (afp->choice == 2) {
8641     CleanVisStringJunkAndCompress (&(afp->div));
8642     CleanVisStringJunkAndCompress (&(afp->city));
8643     CleanVisStringJunkAndCompress (&(afp->sub));
8644     CleanVisStringJunkAndCompress (&(afp->country));
8645     CleanVisStringJunkAndCompress (&(afp->street));
8646     CleanVisStringJunkAndCompress (&(afp->email));
8647     CleanVisStringJunkAndCompress (&(afp->fax));
8648     CleanVisStringJunkAndCompress (&(afp->phone));
8649     CleanVisStringJunkAndCompress (&(afp->postal_code));
8650     TrimSpacesSemicolonsAndCommas (afp->postal_code);
8651     if (StringICmp (afp->country, "U.S.A.") == 0) {
8652       afp->country = MemFree (afp->country);
8653       afp->country = StringSave ("USA");
8654     }
8655     if (StringICmp (afp->country, "USA") == 0 && StringCmp (afp->country, "USA") != 0) {
8656       afp->country = MemFree (afp->country);
8657       afp->country = StringSave ("USA");
8658     }
8659     if (StringCmp (afp->country, "USA") == 0 && afp->sub != NULL) {
8660       StripPeriods (afp->sub);
8661       TrimSpacesAroundString (afp->sub);
8662     }
8663   }
8664   if (afp->affil == NULL &&
8665       afp->div == NULL &&
8666       afp->city == NULL &&
8667       afp->sub == NULL &&
8668       afp->country == NULL &&
8669       afp->street == NULL &&
8670       afp->email == NULL &&
8671       afp->fax == NULL &&
8672       afp->phone == NULL &&
8673       afp->postal_code == NULL) {
8674     afp = MemFree (afp);
8675   }
8676   return afp;
8677 }
8678 
NormalizeAuthors(AuthListPtr alp,Boolean fixInitials)8679 static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials)
8680 
8681 {
8682   AuthorPtr        ap;
8683   CharPtr          initials;
8684   size_t           len;
8685   ValNodePtr       names;
8686   ValNodePtr       next;
8687   NameStdPtr       nsp;
8688   PersonIdPtr      pid;
8689   ValNodePtr PNTR  prev;
8690   CharPtr          str;
8691   Boolean          upcaseinits;
8692   ValNodePtr       vnp;
8693   Boolean          zap;
8694 
8695   if (alp == NULL) return;
8696   alp->affil = CleanAffil (alp->affil);
8697 
8698   if (alp->choice == 2 || alp->choice == 3) {
8699     for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
8700       str = (CharPtr) vnp->data.ptrvalue;
8701       TrimSpacesAroundString (str);
8702       TrimSpacesAndJunkFromEnds (str, FALSE);
8703       Asn2gnbkCompressSpaces (str);
8704     }
8705   }
8706   if (alp->choice != 1) return;
8707 
8708   prev = &(alp->names);
8709   names = alp->names;
8710   while (names != NULL) {
8711     next = names->next;
8712     zap = FALSE;
8713     ap = names->data.ptrvalue;
8714     if (ap != NULL) {
8715       pid = ap->name;
8716       if (pid == NULL) {
8717         /* continue */
8718       } else if (pid->choice == 2) {
8719         nsp = pid->data;
8720         if (nsp != NULL /* && nsp->names [4] != NULL */) {
8721           upcaseinits = FALSE;
8722           initials = nsp->names [4];
8723           if (StringLen (initials) > 0) {
8724             if (IS_UPPER (initials [0])) {
8725               upcaseinits = TRUE;
8726             }
8727           }
8728           str = NameStdPtrToTabbedString (nsp, fixInitials);
8729           pid->data = NameStdFree (nsp);
8730           nsp = TabbedStringToNameStdPtr (str, fixInitials);
8731           if (upcaseinits) {
8732             initials = nsp->names [4];
8733             if (StringLen (initials) > 0) {
8734               if (IS_LOWER (initials [0])) {
8735                 initials [0] = TO_UPPER (initials [0]);
8736               }
8737             }
8738           }
8739           pid->data = nsp;
8740           MemFree (str);
8741           CleanVisString (&(nsp->names [0]));
8742           CleanVisString (&(nsp->names [1]));
8743           CleanVisString (&(nsp->names [2]));
8744           CleanVisString (&(nsp->names [3]));
8745           CleanVisString (&(nsp->names [4]));
8746           CleanVisString (&(nsp->names [5]));
8747           CleanVisString (&(nsp->names [6]));
8748           if (StringCmp (nsp->names [0], "et") == 0 &&
8749               (StringCmp (nsp->names [4], "al") == 0 ||
8750                StringCmp (nsp->names [4], "al.") == 0 ||
8751                StringCmp (nsp->names [4], "Al.") == 0) &&
8752               (StringHasNoText (nsp->names [1]) ||
8753                StringCmp (nsp->names [1], "a") == 0)) {
8754             nsp->names [4] = MemFree (nsp->names [4]);
8755             nsp->names [1] = MemFree (nsp->names [1]);
8756             nsp->names [0] = MemFree (nsp->names [0]);
8757             nsp->names [0] = StringSave ("et al.");
8758           }
8759           str = nsp->names [0];
8760           len = StringLen (str);
8761           if (len > 4 && StringHasNoText (nsp->names [5])) {
8762             if (StringCmp (str + len - 4, " Jr.") == 0 ||
8763                 StringCmp (str + len - 4, " Sr.") == 0) {
8764               nsp->names [5] = StringSave (str + len - 3);
8765               str [len - 4] = '\0';
8766               TrimSpacesAroundString (str);
8767             }
8768           }
8769           str = nsp->names [4];
8770           len = StringLen (str);
8771           if (len > 4 && StringHasNoText (nsp->names [5])) {
8772             if (StringCmp (str + len - 4, ".Jr.") == 0 ||
8773                 StringCmp (str + len - 4, ".Sr.") == 0) {
8774               nsp->names [5] = StringSave (str + len - 3);
8775               str [len - 3] = '\0';
8776               TrimSpacesAroundString (str);
8777             }
8778           }
8779           if (StringHasNoText (nsp->names [0]) &&
8780               StringHasNoText (nsp->names [1]) &&
8781               StringHasNoText (nsp->names [2]) &&
8782               StringHasNoText (nsp->names [3]) &&
8783               StringHasNoText (nsp->names [4]) &&
8784               StringHasNoText (nsp->names [5]) &&
8785               StringHasNoText (nsp->names [6])) {
8786             zap = TRUE;
8787           }
8788           /* last name is required, so zap if not present */
8789           if (StringHasNoText (nsp->names [0])) {
8790             zap = TRUE;
8791           }
8792         }
8793       } else if (pid->choice == 3 || pid->choice == 4 || pid->choice == 5) {
8794         TrimSpacesAroundString ((CharPtr) pid->data);
8795         if (StringHasNoText ((CharPtr) pid->data)) {
8796           zap = TRUE;
8797         }
8798       }
8799     }
8800     if (zap) {
8801       /* remove empty authors */
8802       *prev = names->next;
8803       names->next = NULL;
8804       AuthorFree (ap);
8805       ValNodeFree (names);
8806     } else {
8807       prev = &(names->next);
8808     }
8809     names = next;
8810   }
8811   /* if no remaining authors, put in default author for legal ASN.1 */
8812   if (alp->names == NULL) {
8813     names = ValNodeNew (NULL);
8814     if (names != NULL) {
8815       /*
8816       ap = AuthorNew ();
8817       if (ap != NULL) {
8818         pid = PersonIdNew ();
8819         if (pid != NULL) {
8820           pid->choice = 4;
8821           pid->data = (Pointer) StringSave ("?");
8822           ap->name = pid;
8823           names->choice = 1;
8824           names->data.ptrvalue = ap;
8825           alp->names = names;
8826         }
8827       }
8828       */
8829       names->choice = 3;
8830       names->data.ptrvalue = StringSave ("?");
8831       alp->names = names;
8832       alp->choice = 3;
8833     }
8834   }
8835 }
8836 
StrStripSpaces(CharPtr str)8837 static void StrStripSpaces (
8838   CharPtr str
8839 )
8840 
8841 {
8842   CharPtr  new_str;
8843 
8844   if (str == NULL) return;
8845 
8846   new_str = str;
8847   while (*str != '\0') {
8848     *new_str++ = *str;
8849     if (*str == ' ' || *str == '\t' || *str == '(') {
8850       for (str++; *str == ' ' || *str == '\t'; str++) continue;
8851       if (*str == ')' || *str == ',') {
8852         if( *(new_str - 1) != '(' ) { // this if handles the case "\([ \t]*\)"
8853           --new_str;
8854         }
8855       }
8856     } else {
8857       str++;
8858     }
8859   }
8860   *new_str = '\0';
8861 }
8862 
8863 /* from utilpub.c */
empty_citgen(CitGenPtr cit)8864 static Boolean empty_citgen(CitGenPtr  cit)
8865 {
8866     if (cit == NULL)
8867         return TRUE;
8868     if (cit->cit)
8869         return FALSE;
8870     if (cit->authors)
8871         return FALSE;
8872     if (cit->muid > 0)
8873         return FALSE;
8874     if (cit->journal)
8875         return FALSE;
8876     if (cit->volume)
8877         return FALSE;
8878     if (cit->issue)
8879         return FALSE;
8880     if (cit->pages)
8881         return FALSE;
8882     if (cit->date)
8883         return FALSE;
8884     if (cit->serial_number > 0)
8885         return FALSE;
8886     if (cit->title)
8887         return FALSE;
8888     if (cit->pmid > 0)
8889         return FALSE;
8890     return TRUE;
8891 }
8892 
NormalizePubAuthors(ValNodePtr vnp,Boolean stripSerial,Boolean fixInitials)8893 static void NormalizePubAuthors (ValNodePtr vnp, Boolean stripSerial, Boolean fixInitials)
8894 
8895 {
8896   CitArtPtr    cap;
8897   CitBookPtr   cbp;
8898   CitGenPtr    cgp;
8899   CitPatPtr    cpp;
8900   CitSubPtr    csp;
8901 
8902   if (vnp == NULL) return;
8903   if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return;
8904   if (vnp->data.ptrvalue == NULL) return;
8905   switch (vnp->choice) {
8906     case PUB_Gen :
8907       cgp = (CitGenPtr) vnp->data.ptrvalue;
8908       NormalizeAuthors (cgp->authors, fixInitials);
8909       break;
8910     case PUB_Sub :
8911       csp = (CitSubPtr) vnp->data.ptrvalue;
8912       NormalizeAuthors (csp->authors, fixInitials);
8913       break;
8914     case PUB_Article :
8915       cap = (CitArtPtr) vnp->data.ptrvalue;
8916       NormalizeAuthors (cap->authors, fixInitials);
8917       if (cap->from == 2 || cap->from == 3) {
8918         cbp = (CitBookPtr) cap->fromptr;
8919         if (cbp != NULL) {
8920           NormalizeAuthors (cbp->authors, fixInitials);
8921         }
8922       }
8923       break;
8924     case PUB_Book :
8925       cbp = (CitBookPtr) vnp->data.ptrvalue;
8926       NormalizeAuthors (cbp->authors, fixInitials);
8927       break;
8928     case PUB_Man :
8929       cbp = (CitBookPtr) vnp->data.ptrvalue;
8930       if (cbp->othertype == 2 && cbp->let_type == 3) {
8931         NormalizeAuthors (cbp->authors, fixInitials);
8932       }
8933       break;
8934     case PUB_Patent :
8935       cpp = (CitPatPtr) vnp->data.ptrvalue;
8936       NormalizeAuthors (cpp->authors, fixInitials);
8937       NormalizeAuthors (cpp->applicants, fixInitials);
8938       NormalizeAuthors (cpp->assignees, fixInitials);
8939       break;
8940     default :
8941       break;
8942   }
8943 }
8944 
NormalizeAPub(ValNodePtr vnp,Boolean stripSerial,Boolean fixInitials)8945 static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixInitials)
8946 
8947 {
8948   AffilPtr     affil;
8949   AuthListPtr  alp;
8950   CitArtPtr    cap;
8951   CitBookPtr   cbp;
8952   CitGenPtr    cgp;
8953   CitJourPtr   cjp;
8954   CitPatPtr    cpp;
8955   CitSubPtr    csp;
8956   ImprintPtr   imp;
8957   CharPtr      str;
8958   CharPtr      tmp;
8959   ValNodePtr   ttl;
8960 
8961   if (vnp == NULL) return;
8962   if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return;
8963   if (vnp->data.ptrvalue == NULL) return;
8964   imp = NULL;
8965   switch (vnp->choice) {
8966     case PUB_Gen :
8967       cgp = (CitGenPtr) vnp->data.ptrvalue;
8968       if (stripSerial) {
8969         cgp->serial_number = -1; /* but does not remove if empty */
8970       }
8971       if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
8972         cgp->cit [0] = 'U';
8973         /* cgp->date = DateFree (cgp->date); */ /* remove date if unpublished */
8974         if (cgp->journal == NULL) {
8975           cgp->volume = MemFree (cgp->volume);
8976           cgp->issue = MemFree (cgp->issue);
8977           cgp->pages = MemFree (cgp->pages);
8978         }
8979       }
8980       TrimSpacesAroundString (cgp->cit);
8981       if (StringDoesHaveText (cgp->title)) {
8982         StrStripSpaces (cgp->title);
8983       }
8984       break;
8985     case PUB_Sub :
8986       csp = (CitSubPtr) vnp->data.ptrvalue;
8987       alp = csp->authors;
8988       imp = csp->imp;
8989       if (alp != NULL && alp->affil == NULL && imp != NULL && imp->pub != NULL) {
8990         alp->affil = imp->pub;
8991         imp->pub = NULL;
8992       }
8993       if (csp->date == NULL && imp != NULL && imp->date != NULL) {
8994         csp->date = imp->date;
8995         imp->date = NULL;
8996       }
8997       if (imp != NULL && imp->date == NULL) {
8998         csp->imp = ImprintFree (csp->imp);
8999       }
9000       if (alp != NULL && alp->affil != NULL) {
9001         affil = alp->affil;
9002         if (affil->choice == 1) {
9003           str = affil->affil;
9004           if (StringNICmp (str, "to the ", 7) == 0) {
9005             if (StringNICmp (str + 24, " databases", 10) == 0) {
9006               str += 34;
9007               if (*str == '.') {
9008                 str++;
9009               }
9010               tmp = StringSaveNoNull (TrimSpacesAroundString (str));
9011               affil->affil = MemFree (affil->affil);
9012               affil->affil = tmp;
9013             }
9014           }
9015         }
9016         alp->affil = CleanAffil (alp->affil);
9017       }
9018       imp = csp->imp;
9019       break;
9020     case PUB_Article :
9021       cap = (CitArtPtr) vnp->data.ptrvalue;
9022       if (cap != NULL) {
9023         if (cap->from == 1) {
9024           cjp = (CitJourPtr) cap->fromptr;
9025           if (cjp != NULL) {
9026             imp = cjp->imp;
9027           }
9028         } else if (cap->from == 2 || cap->from == 3) {
9029           cbp = (CitBookPtr) cap->fromptr;
9030           if (cbp != NULL) {
9031             imp = cbp->imp;
9032           }
9033         }
9034         for (ttl = cap->title; ttl != NULL; ttl = ttl->next) {
9035           if (ttl->choice == Cit_title_name) {
9036             str = (CharPtr) ttl->data.ptrvalue;
9037             if (StringHasNoText (str)) continue;
9038             StrStripSpaces (str);
9039           }
9040         }
9041       }
9042       break;
9043     case PUB_Book :
9044       cbp = (CitBookPtr) vnp->data.ptrvalue;
9045       if (cbp != NULL) {
9046         imp = cbp->imp;
9047       }
9048       break;
9049     case PUB_Man :
9050       cbp = (CitBookPtr) vnp->data.ptrvalue;
9051       if (cbp != NULL) {
9052         imp = cbp->imp;
9053         if (imp != NULL) {
9054           affil = imp->pub;
9055           if (affil != NULL && affil->choice == 1) {
9056             CleanVisStringJunkAndCompress (&(affil->affil));
9057           }
9058         }
9059       }
9060       break;
9061     case PUB_Patent :
9062       cpp = (CitPatPtr) vnp->data.ptrvalue;
9063       if (cpp != NULL) {
9064         if (StringCmp (cpp->country, "USA") == 0) {
9065           cpp->country = MemFree (cpp->country);
9066           cpp->country = StringSave ("US");
9067         }
9068       }
9069       break;
9070     default :
9071       break;
9072   }
9073   if (imp != NULL) {
9074     CleanVisStringAndCompress (&(imp->volume));
9075     CleanVisStringAndCompress (&(imp->issue));
9076     CleanVisStringAndCompress (&(imp->pages));
9077     CleanVisStringAndCompress (&(imp->section));
9078     CleanVisStringAndCompress (&(imp->part_sup));
9079     CleanVisStringAndCompress (&(imp->language));
9080     CleanVisStringAndCompress (&(imp->part_supi));
9081   }
9082 }
9083 
9084 //LCOV_EXCL_START
CleanUpPubdescAuthors(PubdescPtr pdp)9085 NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp)
9086 
9087 {
9088   Char             buf1 [121];
9089   Boolean          fixInitials = TRUE;
9090   Boolean          hasArt = FALSE;
9091   Boolean          hasUid = FALSE;
9092   ValNodePtr       next;
9093   ValNodePtr PNTR  prev;
9094   ValNodePtr       vnp;
9095 
9096   if (pdp == NULL) return;
9097   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
9098     if (vnp->choice == PUB_Muid || vnp->choice == PUB_PMid) {
9099       if (vnp->data.intvalue > 0) {
9100         hasUid = TRUE;
9101       }
9102     } else if (vnp->choice == PUB_Article) {
9103       hasArt = TRUE;
9104     }
9105   }
9106   if (hasArt && hasUid) {
9107     fixInitials = FALSE;
9108   }
9109   prev = &(pdp->pub);
9110   vnp = pdp->pub;
9111   while (vnp != NULL) {
9112     next = vnp->next;
9113     PubLabelUnique (vnp, buf1, sizeof (buf1) - 1, OM_LABEL_CONTENT, TRUE);
9114     NormalizePubAuthors (vnp, TRUE, fixInitials);
9115     vnp = next;
9116   }
9117 }
9118 //LCOV_EXCL_STOP
9119 
9120 static int pub_order [] = {
9121   0,
9122   3,
9123   4,
9124   13,
9125   2,
9126   5,
9127   6,
9128   7,
9129   8,
9130   9,
9131   10,
9132   11,
9133   12,
9134   1
9135 };
9136 
SortByPubType(VoidPtr ptr1,VoidPtr ptr2)9137 static int LIBCALLBACK SortByPubType (VoidPtr ptr1, VoidPtr ptr2)
9138 
9139 {
9140   Uint1       chs1;
9141   Uint1       chs2;
9142   ValNodePtr  vnp1;
9143   ValNodePtr  vnp2;
9144 
9145   if (ptr1 == NULL || ptr2 == NULL) return 0;
9146   vnp1 = *((ValNodePtr PNTR) ptr1);
9147   vnp2 = *((ValNodePtr PNTR) ptr2);
9148   if (vnp1 == NULL || vnp2 == NULL) return 0;
9149   chs1 = (Uint1) vnp1->choice;
9150   chs2 = (Uint1) vnp2->choice;
9151   if (chs1 < 14 && chs2 < 14) {
9152     chs1 = pub_order [chs1];
9153     chs2 = pub_order [chs2];
9154   }
9155   if (chs1 > chs2) {
9156     return 1;
9157   } else if (chs1 < chs2) {
9158     return -1;
9159   }
9160   return 0;
9161 }
9162 
NormalizePubdesc(PubdescPtr pdp,Boolean stripSerial,Boolean doAuthors,ValNodePtr PNTR publist)9163 static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAuthors, ValNodePtr PNTR publist)
9164 
9165 {
9166   ArticleIdPtr      aip;
9167   Int4              artpmid = 0;
9168   Char              buf1 [121];
9169   Char              buf2 [121];
9170   CitArtPtr         cap = NULL;
9171   CitGenPtr         cgp;
9172   CitJourPtr        cjp;
9173   Boolean           fixInitials = TRUE;
9174   Boolean           hasArt = FALSE;
9175   Boolean           hasUid = FALSE;
9176   ImprintPtr        imp;
9177   Int4              lastartpmid = 0;
9178   Int4              muid = 0;
9179   ValNodePtr        next;
9180   ArticleIdPtr      nextaip;
9181   Int4              pmid = 0;
9182   ValNodePtr PNTR   prev;
9183   ArticleIdPtr PNTR prevaip;
9184   ValNodePtr        vnp;
9185 
9186   if (pdp == NULL) return;
9187   CleanVisString (&(pdp->comment));
9188   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
9189     if (vnp->choice == PUB_Muid) {
9190       if (vnp->data.intvalue > 0) {
9191         muid = vnp->data.intvalue;
9192       }
9193     }
9194     if (vnp->choice == PUB_Muid || vnp->choice == PUB_PMid) {
9195       if (vnp->data.intvalue > 0) {
9196         hasUid = TRUE;
9197       }
9198     } else if (vnp->choice == PUB_Article) {
9199       hasArt = TRUE;
9200     }
9201   }
9202   if (hasArt && hasUid) {
9203     fixInitials = FALSE;
9204   }
9205   if (pdp->pub != NULL) {
9206     pdp->pub = ValNodeSort (pdp->pub, SortByPubType);
9207   }
9208 
9209   /* remove zero muid where there is also a non-zero muid */
9210   prev = &(pdp->pub);
9211   vnp = pdp->pub;
9212   while (vnp != NULL) {
9213     next = vnp->next;
9214     if (vnp->choice == PUB_Muid && vnp->data.intvalue == 0 && muid != 0) {
9215       *prev = vnp->next;
9216       vnp->next = NULL;
9217       PubFree (vnp);
9218     } else {
9219       prev = &(vnp->next);
9220     }
9221     vnp = next;
9222   }
9223 
9224   prev = &(pdp->pub);
9225   vnp = pdp->pub;
9226   if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
9227     cgp = (CitGenPtr) vnp->data.ptrvalue;
9228     buf1 [0] = '\0';
9229     PubLabelUnique (vnp, buf1, sizeof (buf1) - 1, OM_LABEL_CONTENT, TRUE);
9230     if (doAuthors) {
9231       NormalizeAuthors (cgp->authors, fixInitials);
9232     }
9233     if (stripSerial) {
9234       cgp->serial_number = -1;
9235     }
9236     if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
9237       cgp->cit [0] = 'U';
9238       /* cgp->date = DateFree (cgp->date); */ /* remove date if unpublished */
9239       if (cgp->journal == NULL) {
9240         cgp->volume = MemFree (cgp->volume);
9241         cgp->issue = MemFree (cgp->issue);
9242         cgp->pages = MemFree (cgp->pages);
9243       }
9244     }
9245     TrimSpacesAroundString (cgp->cit);
9246     if (StringDoesHaveText (cgp->title)) {
9247       StrStripSpaces (cgp->title);
9248     }
9249     buf2 [0] = '\0';
9250     PubLabelUnique (vnp, buf2, sizeof (buf2) - 1, OM_LABEL_CONTENT, TRUE);
9251     if (StringCmp (buf1, buf2) != 0) {
9252       ValNodeCopyStr (publist, 1, buf1);
9253       ValNodeCopyStr (publist, 2, buf2);
9254     }
9255     return; /* but does not remove if empty and only element of Pub */
9256   }
9257   while (vnp != NULL) {
9258     next = vnp->next;
9259     buf1 [0] = '\0';
9260     PubLabelUnique (vnp, buf1, sizeof (buf1) - 1, OM_LABEL_CONTENT, TRUE);
9261     if (doAuthors) {
9262       NormalizePubAuthors (vnp, stripSerial, fixInitials);
9263     }
9264     NormalizeAPub (vnp, stripSerial, fixInitials);
9265     if (vnp->choice == PUB_Article) {
9266       cap = (CitArtPtr) vnp->data.ptrvalue;
9267       if (cap != NULL && cap->from == 1) {
9268         cjp = (CitJourPtr) cap->fromptr;
9269         if (cjp != NULL) {
9270           imp = cjp->imp;
9271           if (imp != NULL) {
9272             if (imp->pubstatus == PUBSTATUS_aheadofprint && imp->prepub != 2) {
9273               if (StringHasNoText (imp->volume) || StringHasNoText (imp->pages)) {
9274                 imp->prepub = 2;
9275               }
9276             }
9277             if (imp->pubstatus == PUBSTATUS_aheadofprint && imp->prepub == 2) {
9278               if (StringDoesHaveText (imp->volume) && StringDoesHaveText (imp->pages)) {
9279                 imp->prepub = 0;
9280               }
9281             }
9282             if (imp->pubstatus == PUBSTATUS_epublish && imp->prepub == 2) {
9283               imp->prepub = 0;
9284             }
9285           }
9286         }
9287       }
9288       if (cap != NULL) {
9289         aip = cap->ids;
9290         prevaip = (ArticleIdPtr PNTR) &(cap->ids);
9291         lastartpmid = 0;
9292         while (aip != NULL) {
9293           nextaip = aip->next;
9294           if (aip->choice == ARTICLEID_PUBMED) {
9295             artpmid = aip->data.intvalue;
9296             if (lastartpmid != 0 && lastartpmid == artpmid) {
9297               aip->next = NULL;
9298               *prevaip = nextaip;
9299               ArticleIdFree (aip);
9300             } else {
9301               prevaip = (ArticleIdPtr PNTR) &(aip->next);
9302             }
9303             lastartpmid = artpmid;
9304           } else {
9305             prevaip = (ArticleIdPtr PNTR) &(aip->next);
9306           }
9307           aip = nextaip;
9308         }
9309       }
9310     } else if (vnp->choice == PUB_PMid) {
9311       pmid = vnp->data.intvalue;
9312     }
9313     if (vnp->choice == PUB_Gen && empty_citgen ((CitGenPtr) vnp->data.ptrvalue)) {
9314       *prev = vnp->next;
9315       vnp->next = NULL;
9316       PubFree (vnp);
9317     } else {
9318       prev = &(vnp->next);
9319       buf2 [0] = '\0';
9320       PubLabelUnique (vnp, buf2, sizeof (buf2) - 1, OM_LABEL_CONTENT, TRUE);
9321       if (StringCmp (buf1, buf2) != 0) {
9322         ValNodeCopyStr (publist, 1, buf1);
9323         ValNodeCopyStr (publist, 2, buf2);
9324       }
9325     }
9326     vnp = next;
9327   }
9328   if (pmid == 0 && artpmid > 0) {
9329     ValNodeAddInt (&(pdp->pub), PUB_PMid, artpmid);
9330   } else if (pmid > 0 && artpmid == 0 && cap != NULL) {
9331     ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, pmid);
9332   }
9333 }
9334 
9335 //LCOV_EXCL_START
CleanUpPubdescBody(PubdescPtr pdp,Boolean stripSerial)9336 NLM_EXTERN void CleanUpPubdescBody (PubdescPtr pdp, Boolean stripSerial)
9337 
9338 {
9339   if (pdp == NULL) return;
9340   NormalizePubdesc (pdp, stripSerial, FALSE, NULL);
9341 }
9342 //LCOV_EXCL_STOP
9343 
KeywordAlreadyInList(ValNodePtr head,CharPtr kwd)9344 static Boolean KeywordAlreadyInList (ValNodePtr head, CharPtr kwd)
9345 
9346 {
9347   ValNodePtr  vnp;
9348 
9349   if (head == NULL || kwd == NULL) return FALSE;
9350 
9351   for (vnp = head; vnp != NULL; vnp = vnp->next) {
9352     if (StringICmp ((CharPtr) vnp->data.ptrvalue, kwd) == 0) return TRUE;
9353   }
9354 
9355   return FALSE;
9356 }
9357 
CopyGeneXrefToGeneFeat(GeneRefPtr grp,GeneRefPtr grx)9358 static Boolean CopyGeneXrefToGeneFeat (GeneRefPtr grp, GeneRefPtr grx)
9359 
9360 {
9361   if (grp == NULL || grx == NULL) return FALSE;
9362   if (grx->db != NULL) {
9363     ValNodeLink (&(grp->db), grx->db);
9364     grx->db = NULL;
9365   }
9366   if (grx->locus == NULL && grx->allele == NULL &&
9367       grx->desc == NULL && grx->maploc == NULL &&
9368       grx->locus_tag == NULL && grx->db == NULL &&
9369       grx->syn == NULL) return TRUE;
9370   return FALSE;
9371 }
9372 
HandleXrefOnGene(SeqFeatPtr sfp)9373 static void HandleXrefOnGene (SeqFeatPtr sfp)
9374 
9375 {
9376   GeneRefPtr           grp;
9377   GeneRefPtr           grx;
9378   SeqFeatXrefPtr       next;
9379   SeqFeatXrefPtr PNTR  prev;
9380   SeqFeatXrefPtr       xref;
9381 
9382   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
9383   grp = (GeneRefPtr) sfp->data.value.ptrvalue;
9384   if (grp == NULL) return;
9385    prev = &(sfp->xref);
9386   xref = sfp->xref;
9387   while (xref != NULL) {
9388     next = xref->next;
9389     if (xref->data.choice == SEQFEAT_GENE) {
9390       grx = (GeneRefPtr) xref->data.value.ptrvalue;
9391       if (CopyGeneXrefToGeneFeat (grp, grx)) {
9392         *(prev) = next;
9393         xref->next = NULL;
9394         SeqFeatXrefFree (xref);
9395       } else {
9396         prev = &(xref->next);
9397       }
9398     } else {
9399       prev = &(xref->next);
9400     }
9401     xref = next;
9402   }
9403 }
9404 
CopyProtXrefToProtFeat(ProtRefPtr prp,ProtRefPtr prx)9405 static void CopyProtXrefToProtFeat (ProtRefPtr prp, ProtRefPtr prx)
9406 
9407 {
9408   ValNodePtr       curr;
9409   size_t           len;
9410   ValNodePtr       next;
9411   ValNodePtr PNTR  prev;
9412   CharPtr          str;
9413 
9414   if (prp == NULL || prx == NULL) return;
9415 
9416   if (prx->db != NULL) {
9417     ValNodeLink (&(prp->db), prx->db);
9418     prx->db = NULL;
9419   }
9420 
9421   prev = &(prx->name);
9422   curr = prx->name;
9423   while (curr != NULL) {
9424     next = curr->next;
9425     str = (CharPtr) curr->data.ptrvalue;
9426     if (! KeywordAlreadyInList (prp->name, str)) {
9427       ValNodeCopyStr (&(prp->name), 0, str);
9428       *(prev) = next;
9429       curr->next = NULL;
9430       curr->data.ptrvalue = NULL;
9431       ValNodeFree (curr);
9432     } else {
9433       prev = &(curr->next);
9434     }
9435     curr = next;
9436   }
9437 
9438   if (prp->desc == NULL) {
9439     prp->desc = prx->desc;
9440     prx->desc = NULL;
9441   } else if (prx->desc != NULL) {
9442     if (StringCmp (prx->desc, prp->desc) != 0) {
9443       len = StringLen (prp->desc) + StringLen (prx->desc) + 6;
9444       str = MemNew (len);
9445       if (str != NULL) {
9446         StringCpy (str, prp->desc);
9447         StringCat (str, "; ");
9448         StringCat (str, prx->desc);
9449         prp->desc = MemFree (prp->desc);
9450         prp->desc = str;
9451       }
9452     }
9453   }
9454 
9455   prev = &(prx->ec);
9456   curr = prx->ec;
9457   while (curr != NULL) {
9458     next = curr->next;
9459     str = (CharPtr) curr->data.ptrvalue;
9460     if (! KeywordAlreadyInList (prp->ec, str)) {
9461       ValNodeCopyStr (&(prp->ec), 0, str);
9462       *(prev) = next;
9463       curr->next = NULL;
9464       curr->data.ptrvalue = NULL;
9465       ValNodeFree (curr);
9466     } else {
9467       prev = &(curr->next);
9468     }
9469     curr = next;
9470   }
9471 
9472   prev = &(prx->activity);
9473   curr = prx->activity;
9474   while (curr != NULL) {
9475     next = curr->next;
9476     str = (CharPtr) curr->data.ptrvalue;
9477     if (! KeywordAlreadyInList (prp->activity, str)) {
9478       ValNodeCopyStr (&(prp->activity), 0, str);
9479       curr->data.ptrvalue = NULL;
9480     }
9481     *(prev) = next;
9482     curr->next = NULL;
9483     curr->data.ptrvalue = NULL;
9484     ValNodeFree (curr);
9485     curr = next;
9486   }
9487 }
9488 
InGpsGenomic(SeqFeatPtr sfp)9489 static Boolean InGpsGenomic (SeqFeatPtr sfp)
9490 
9491 {
9492   BioseqPtr     bsp;
9493   BioseqSetPtr  bssp;
9494 
9495   if (sfp == NULL) return FALSE;
9496   bsp = BioseqFindFromSeqLoc (sfp->location);
9497   if (bsp == NULL) return FALSE;
9498   if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
9499     bssp = (BioseqSetPtr) bsp->idx.parentptr;
9500     while (bssp != NULL) {
9501       if (bssp->_class == BioseqseqSet_class_nuc_prot) return FALSE;
9502       if (bssp->_class == BioseqseqSet_class_gen_prod_set) return TRUE;
9503       if (bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE;
9504       bssp = (BioseqSetPtr) bssp->idx.parentptr;
9505     }
9506   }
9507   return FALSE;
9508 }
9509 
HandleXrefOnCDS(SeqFeatPtr sfp)9510 static void HandleXrefOnCDS (SeqFeatPtr sfp)
9511 
9512 {
9513   SeqFeatXrefPtr       next;
9514   SeqFeatXrefPtr PNTR  prev;
9515   SeqFeatPtr           prot;
9516   ProtRefPtr           prp;
9517   ProtRefPtr           prx;
9518   SeqFeatXrefPtr       xref;
9519 
9520   if (sfp != NULL && sfp->product != NULL) {
9521     if (InGpsGenomic (sfp)) return;
9522     prot = GetBestProteinFeatureUnindexed (sfp->product);
9523     if (prot != NULL) {
9524       prp = (ProtRefPtr) prot->data.value.ptrvalue;
9525       if (prp != NULL) {
9526         prev = &(sfp->xref);
9527         xref = sfp->xref;
9528         while (xref != NULL) {
9529           next = xref->next;
9530           if (xref->data.choice == SEQFEAT_PROT) {
9531             prx = (ProtRefPtr) xref->data.value.ptrvalue;
9532             CopyProtXrefToProtFeat (prp, prx);
9533             *(prev) = next;
9534             xref->next = NULL;
9535             SeqFeatXrefFree (xref);
9536           } else {
9537             prev = &(xref->next);
9538           }
9539           xref = next;
9540         }
9541       }
9542     }
9543   }
9544 }
9545 
CleanUserStrings(UserFieldPtr ufp,Pointer userdata)9546 static void CleanUserStrings (
9547   UserFieldPtr ufp,
9548   Pointer userdata
9549 )
9550 
9551 {
9552   CharPtr PNTR  cpp;
9553   Int4          i;
9554   ObjectIdPtr   oip;
9555 
9556   oip = ufp->label;
9557   if (oip != NULL && oip->str != NULL) {
9558     if (! StringHasNoText (oip->str)) {
9559       CleanVisString (&(oip->str));
9560     }
9561   }
9562   if (ufp->choice == 1) {
9563     if (! StringHasNoText ((CharPtr) ufp->data.ptrvalue)) {
9564       CleanVisStringAndCompress ((CharPtr PNTR) &(ufp->data.ptrvalue));
9565     }
9566   } else if (ufp->choice == 7) {
9567     cpp = (CharPtr PNTR) ufp->data.ptrvalue;
9568     if (cpp != NULL) {
9569       for (i = 0; i < ufp->num; i++) {
9570         TrimSpacesSemicolonsAndCommas (cpp [i]);
9571         Asn2gnbkCompressSpaces (cpp [i]);
9572       }
9573     }
9574   }
9575 }
9576 
CleanUserFields(UserFieldPtr ufp,Pointer userdata)9577 static void CleanUserFields (
9578   UserFieldPtr ufp,
9579   Pointer userdata
9580 )
9581 
9582 {
9583   ObjectIdPtr  oip;
9584 
9585   oip = ufp->label;
9586   if (oip != NULL && oip->str != NULL) {
9587     if (! StringHasNoText (oip->str)) {
9588       CleanVisString (&(oip->str));
9589     }
9590   }
9591   VisitUserFieldsInUfp (ufp, userdata, CleanUserStrings);
9592 }
9593 
9594 //LCOV_EXCL_START
UserFieldSort(UserFieldPtr list,int (LIBCALLBACK * compar)PROTO ((VoidPtr,VoidPtr)))9595 NLM_EXTERN UserFieldPtr LIBCALL UserFieldSort (UserFieldPtr list, int (LIBCALLBACK *compar ) PROTO((VoidPtr, VoidPtr)))
9596 
9597 {
9598   Int4          count, i;
9599   UserFieldPtr  PNTR head;
9600   UserFieldPtr  tmp;
9601 
9602   if (list == NULL) return NULL;
9603 
9604   count = 0;
9605   for (tmp = list; tmp != NULL; tmp = tmp->next) {
9606     count++;
9607   }
9608 
9609   head = (UserFieldPtr *) MemNew (((size_t) count + 1) * sizeof (UserFieldPtr));
9610 
9611   for (tmp = list, i = 0; tmp != NULL && i < count; i++) {
9612     head [i] = tmp;
9613     tmp = tmp->next;
9614   }
9615 
9616   HeapSort (head, (size_t) count, sizeof (UserFieldPtr), compar);
9617 
9618   for (i = 0; i < count; i++) {
9619     tmp = head [i];
9620     tmp->next = head [i + 1];
9621   }
9622   list = head [0];
9623 
9624   MemFree (head);
9625 
9626   return list;
9627 }
9628 //LCOV_EXCL_STOP
9629 
9630 /*
9631 static CharPtr barcodeOrder [] = {
9632   "",
9633   "StructuredCommentPrefix",
9634   "Barcode Index Number",
9635   "Order Assignment",
9636   "iBOL Working Group",
9637   "iBOL Release Status",
9638   "Tentative Name",
9639   "StructuredCommentSuffix",
9640   NULL
9641 };
9642 
9643 static Int2 GetBarcodeOrder (CharPtr str)
9644 
9645 {
9646   Int2  i;
9647 
9648   if (StringHasNoText (str)) return 0;
9649 
9650   for (i = 1; barcodeOrder [i] != NULL; i++) {
9651     if (StringCmp (str, barcodeOrder [i]) == 0) return i;
9652   }
9653 
9654   return 0;
9655 }
9656 
9657 static int LIBCALLBACK ReorderBarcodeFields (VoidPtr ptr1, VoidPtr ptr2)
9658 
9659 {
9660   Int2          idx1, idx2;
9661   ObjectIdPtr   lbl1, lbl2;
9662   CharPtr       str1, str2;
9663   UserFieldPtr  ufp1, ufp2;
9664 
9665   if (ptr1 == NULL || ptr2 == NULL) return 0;
9666 
9667   ufp1 = *((UserFieldPtr PNTR) ptr1);
9668   ufp2 = *((UserFieldPtr PNTR) ptr2);
9669   if (ufp1 == NULL || ufp2 == NULL) return 0;
9670 
9671   lbl1 = (ObjectIdPtr) ufp1->label;
9672   lbl2 = (ObjectIdPtr) ufp2->label;
9673   if (lbl1 == NULL || lbl2 == NULL) return 0;
9674 
9675   str1 = (CharPtr) lbl1->str;
9676   str2 = (CharPtr) lbl2->str;
9677   if (str1 == NULL || str2 == NULL) return 0;
9678 
9679   idx1 = GetBarcodeOrder (str1);
9680   idx2 = GetBarcodeOrder (str2);
9681 
9682   if (idx1 > idx2) return 1;
9683   if (idx1 < idx2) return -1;
9684 
9685   return 0;
9686 }
9687 */
9688 
CleanStructuredComment(UserObjectPtr uop)9689 NLM_EXTERN void CleanStructuredComment (
9690   UserObjectPtr uop
9691 )
9692 
9693 {
9694   Boolean      genome_assembly_data = FALSE, ibol_data = FALSE;
9695   UserFieldPtr ufp;
9696   CharPtr      str, core, new_str;
9697 
9698   if (uop == NULL || uop->type == NULL
9699       || StringCmp (uop->type->str, "StructuredComment") != 0) {
9700     return;
9701   }
9702 
9703   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
9704     if (ufp->label != NULL
9705         && ufp->choice == 1
9706         && (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
9707       if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
9708         core = StructuredCommentDbnameFromString(str);
9709         new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15));
9710         sprintf (new_str, "##%s-START##", core);
9711         str = MemFree (str);
9712         ufp->data.ptrvalue = new_str;
9713         if (StringCmp (core, "Genome-Assembly-Data") == 0) {
9714           genome_assembly_data = TRUE;
9715         } else if (StringCmp (core, "International Barcode of Life (iBOL)Data") == 0) {
9716           ibol_data = TRUE;
9717         }
9718         core = MemFree (core);
9719       } else if (StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) {
9720         core = StructuredCommentDbnameFromString(str);
9721         new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15));
9722         sprintf (new_str, "##%s-END##", core);
9723         str = MemFree (str);
9724         ufp->data.ptrvalue = new_str;
9725         if (StringCmp (core, "Genome-Assembly-Data") == 0) {
9726           genome_assembly_data = TRUE;
9727         } else if (StringCmp (core, "International Barcode of Life (iBOL)Data") == 0) {
9728           ibol_data = TRUE;
9729         }
9730         core = MemFree (core);
9731       }
9732     }
9733   }
9734 
9735   if (genome_assembly_data) {
9736     for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
9737       if (ufp->label != NULL
9738           && ufp->choice == 1
9739           && (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
9740         if (StringCmp (ufp->label->str, "Finishing Goal") == 0 ||
9741             StringCmp (ufp->label->str, "Current Finishing Status") == 0) {
9742           if (StringCmp (str, "High Quality Draft") == 0) {
9743             ufp->data.ptrvalue = StringSave ("High-Quality Draft");
9744             str = MemFree (str);
9745           } else if (StringCmp (str, "Improved High Quality Draft") == 0) {
9746             ufp->data.ptrvalue = StringSave ("Improved High-Quality Draft");
9747             str = MemFree (str);
9748           } else if (StringCmp (str, "Annotation Directed") == 0) {
9749             ufp->data.ptrvalue = StringSave ("Annotation-Directed Improvement");
9750             str = MemFree (str);
9751           } else if (StringCmp (str, "Non-contiguous Finished") == 0) {
9752             ufp->data.ptrvalue = StringSave ("Noncontiguous Finished");
9753             str = MemFree (str);
9754           }
9755         } else if (StringCmp(ufp->label->str, "Assembly Date") == 0) {
9756           str = (CharPtr) ufp->data.ptrvalue;
9757           ReformatAssemblyDate(&str);
9758           ufp->data.ptrvalue = str;
9759         }
9760       }
9761     }
9762   }
9763 
9764   if (ibol_data) {
9765     /*
9766     uop->data = UserFieldSort (uop->data, ReorderBarcodeFields);
9767     */
9768     ReorderStructuredCommentFields (uop);
9769   }
9770 }
9771 
9772 
9773 //LCOV_EXCL_START
9774 // change made as a result of SQD-2399, which will not be implemented for the C++ Toolkit
9775 // going forward. bad data was generated internally, production process has been fixed.
CleanRefGeneTrackingUserObject(UserObjectPtr uop)9776 static void CleanRefGeneTrackingUserObject (
9777   UserObjectPtr uop
9778 )
9779 
9780 {
9781   UserFieldPtr  asmbly = NULL, entry, tmp, ufp;
9782   ObjectIdPtr   oip;
9783 
9784   if (uop == NULL) return;
9785   oip = uop->type;
9786   if (oip == NULL) return;
9787   if (StringCmp (oip->str, "RefGeneTracking") != 0) return;
9788 
9789   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
9790     oip = ufp->label;
9791     if (oip == NULL) continue;
9792     if (StringCmp (oip->str, "Assembly") != 0) continue;
9793     asmbly = ufp;
9794     break;
9795   }
9796 
9797   if (asmbly == NULL || asmbly->choice != 11) return;
9798   tmp = asmbly->data.ptrvalue;
9799   if (tmp == NULL || tmp->choice == 11) return;
9800 
9801   entry = UserFieldNew ();
9802   if (entry == NULL) return;
9803   oip = ObjectIdNew ();
9804   if (oip == NULL) return;
9805 
9806   entry->data.ptrvalue = (Pointer) tmp;
9807   entry->choice = 11;
9808   entry->label = oip;
9809   oip->id = 0;
9810 
9811   asmbly->data.ptrvalue = (Pointer) entry;
9812   asmbly->choice = 11;
9813 }
9814 //LCOV_EXCL_STOP
9815 
CleanUserObject(UserObjectPtr uop,Pointer userdata)9816 static void CleanUserObject (
9817   UserObjectPtr uop,
9818   Pointer userdata
9819 )
9820 
9821 {
9822   ObjectIdPtr  oip;
9823 
9824   oip = uop->type;
9825   if (oip != NULL && oip->str != NULL) {
9826     if (! StringHasNoText (oip->str)) {
9827       CleanVisString (&(oip->str));
9828     }
9829   }
9830   VisitUserFieldsInUop (uop, userdata, CleanUserFields);
9831   CleanStructuredComment (uop);
9832   CleanRefGeneTrackingUserObject (uop);
9833 }
9834 
9835 static CharPtr bsecSiteList [] = {
9836   "", "active", "binding", "cleavage", "inhibit", "modifi",
9837   "glycosylation", "myristoylation", "mutagenized", "metal-binding",
9838   "phosphorylation", "acetylation", "amidation", "methylation",
9839   "hydroxylation", "sulfatation", "oxidative-deamination",
9840   "pyrrolidone-carboxylic-acid", "gamma-carboxyglutamic-acid",
9841   "blocked", "lipid-binding", "np-binding", "DNA-binding",
9842   "signal-peptide", "transit-peptide", "transmembrane-region",
9843   "nitrosylation", NULL
9844 };
9845 
9846 static CharPtr uninfStrings [] = {
9847   "signal",
9848   "transit",
9849   "peptide",
9850   "signal peptide",
9851   "signal-peptide",
9852   "signal_peptide",
9853   "transit peptide",
9854   "transit-peptide",
9855   "transit_peptide",
9856   "unnamed",
9857   "unknown",
9858   "putative",
9859   NULL
9860 };
9861 
InformativeString(CharPtr str)9862 static Boolean InformativeString (CharPtr str)
9863 
9864 {
9865   Int2  i;
9866 
9867   if (StringHasNoText (str)) return FALSE;
9868 
9869   for (i = 0; uninfStrings [i] != NULL; i++) {
9870     if (StringICmp (str, uninfStrings [i]) == 0) return FALSE;
9871   }
9872 
9873   return TRUE;
9874 }
9875 
CleanUpExceptText(SeqFeatPtr sfp)9876 static void CleanUpExceptText (SeqFeatPtr sfp)
9877 
9878 {
9879   ValNodePtr  head, vnp;
9880   size_t      len;
9881   CharPtr     prefix, ptr, str, tmp;
9882 
9883   if (sfp == NULL || sfp->except_text == NULL) return;
9884   if (StringStr (sfp->except_text, "ribosome slippage") == NULL &&
9885       StringStr (sfp->except_text, "trans splicing") == NULL &&
9886       StringStr (sfp->except_text, "alternate processing") == NULL &&
9887       StringStr (sfp->except_text, "non-consensus splice site") == NULL &&
9888       StringStr (sfp->except_text, "adjusted for low quality genome") == NULL) return;
9889 
9890   head = NULL;
9891   str = sfp->except_text;
9892   tmp = str;
9893   while (! StringHasNoText (tmp)) {
9894     ptr = StringChr (tmp, ',');
9895     if (ptr != NULL) {
9896       *ptr = '\0';
9897       ptr++;
9898     }
9899     TrimSpacesAroundString (tmp);
9900     ValNodeCopyStr (&head, 0, tmp);
9901     tmp = ptr;
9902   }
9903   for (vnp = head; vnp != NULL; vnp = vnp->next) {
9904     tmp = (CharPtr) vnp->data.ptrvalue;
9905     if (StringHasNoText (tmp)) continue;
9906     if (StringCmp (tmp, "ribosome slippage") == 0) {
9907       vnp->data.ptrvalue = MemFree (tmp);
9908       vnp->data.ptrvalue = StringSave ("ribosomal slippage");
9909     } else if (StringCmp (tmp, "trans splicing") == 0) {
9910       vnp->data.ptrvalue = MemFree (tmp);
9911       vnp->data.ptrvalue = StringSave ("trans-splicing");
9912     } else if (StringCmp (tmp, "alternate processing") == 0) {
9913       vnp->data.ptrvalue = MemFree (tmp);
9914       vnp->data.ptrvalue = StringSave ("alternative processing");
9915     } else if (StringCmp (tmp, "non-consensus splice site") == 0) {
9916       vnp->data.ptrvalue = MemFree (tmp);
9917       vnp->data.ptrvalue = StringSave ("nonconsensus splice site");
9918     } else if (StringCmp (tmp, "adjusted for low quality genome") == 0) {
9919       vnp->data.ptrvalue = MemFree (tmp);
9920       vnp->data.ptrvalue = StringSave ("adjusted for low-quality genome");
9921     }
9922   }
9923 
9924   len = 0;
9925   for (vnp = head; vnp != NULL; vnp = vnp->next) {
9926     tmp = (CharPtr) vnp->data.ptrvalue;
9927     if (StringHasNoText (tmp)) continue;
9928     len += StringLen (tmp) + 2;
9929   }
9930 
9931   str = (CharPtr) MemNew (len + 2);
9932   if (str == NULL) return;
9933 
9934   prefix = "";
9935   for (vnp = head; vnp != NULL; vnp = vnp->next) {
9936     tmp = (CharPtr) vnp->data.ptrvalue;
9937     if (StringHasNoText (tmp)) continue;
9938     StringCat (str, prefix);
9939     StringCat (str, tmp);
9940     prefix = ", ";
9941   }
9942 
9943   sfp->except_text = MemFree (sfp->except_text);
9944   sfp->except_text = str;
9945 
9946   ValNodeFreeData (head);
9947 }
9948 
ExpandGeneSynCom(ValNodePtr headsyn)9949 static Boolean ExpandGeneSynCom (ValNodePtr headsyn)
9950 
9951 {
9952   ValNodePtr  lastsyn;
9953   ValNodePtr  newsyn;
9954   ValNodePtr  nextsyn;
9955   CharPtr     ptr;
9956   CharPtr     str;
9957   CharPtr     tmp;
9958 
9959   str = (CharPtr) headsyn->data.ptrvalue;
9960   if (StringHasNoText (str)) return TRUE;
9961   if (StringChr (str, ',') == NULL) return FALSE;
9962 
9963   nextsyn = headsyn->next;
9964   lastsyn = headsyn;
9965   tmp = StringSave ((CharPtr) headsyn->data.ptrvalue);
9966   str = tmp;
9967 
9968   while (! StringHasNoText (str)) {
9969     ptr = StringChr (str, ',');
9970     if (ptr != NULL) {
9971       *ptr = '\0';
9972       ptr++;
9973     }
9974     TrimSpacesAroundString (str);
9975     newsyn = ValNodeNew (NULL);
9976     if (newsyn != NULL) {
9977       newsyn->data.ptrvalue = StringSave (str);
9978       newsyn->next = nextsyn;
9979       lastsyn->next = newsyn;
9980       lastsyn = newsyn;
9981     }
9982     str = ptr;
9983   }
9984 
9985   MemFree (tmp);
9986   return TRUE;
9987 }
9988 
ExpandGeneSynSem(ValNodePtr headsyn)9989 static Boolean ExpandGeneSynSem (ValNodePtr headsyn)
9990 
9991 {
9992   ValNodePtr  lastsyn;
9993   ValNodePtr  newsyn;
9994   ValNodePtr  nextsyn;
9995   CharPtr     ptr;
9996   CharPtr     str;
9997   CharPtr     tmp;
9998 
9999   str = (CharPtr) headsyn->data.ptrvalue;
10000   if (StringHasNoText (str)) return TRUE;
10001   if (StringStr (str, "; ") == NULL) return FALSE;
10002 
10003   nextsyn = headsyn->next;
10004   lastsyn = headsyn;
10005   tmp = StringSave ((CharPtr) headsyn->data.ptrvalue);
10006   str = tmp;
10007 
10008   while (! StringHasNoText (str)) {
10009     ptr = StringStr (str, "; ");
10010     if (ptr != NULL) {
10011       ptr++;
10012       *ptr = '\0';
10013       ptr++;
10014     }
10015     TrimSpacesAroundString (str);
10016     newsyn = ValNodeNew (NULL);
10017     if (newsyn != NULL) {
10018       newsyn->data.ptrvalue = StringSave (str);
10019       newsyn->next = nextsyn;
10020       lastsyn->next = newsyn;
10021       lastsyn = newsyn;
10022     }
10023     str = ptr;
10024   }
10025 
10026   MemFree (tmp);
10027   return TRUE;
10028 }
10029 
ExpandGeneSynList(GeneRefPtr grp)10030 static void ExpandGeneSynList (GeneRefPtr grp)
10031 
10032 {
10033   ValNodePtr       currsyn;
10034   ValNodePtr       nextsyn;
10035   ValNodePtr PNTR  prevsyn;
10036 
10037   if (grp == NULL || grp->syn == NULL) return;
10038 
10039   currsyn = grp->syn;
10040   prevsyn = &(grp->syn);
10041   while (currsyn != NULL) {
10042     if (ExpandGeneSynCom (currsyn)) {
10043       nextsyn = currsyn->next;
10044       *(prevsyn) = currsyn->next;
10045       currsyn->next = NULL;
10046       ValNodeFreeData (currsyn);
10047     } else {
10048       nextsyn = currsyn->next;
10049       prevsyn = (ValNodePtr PNTR) &(currsyn->next);
10050     }
10051     currsyn = nextsyn;
10052   }
10053 
10054   currsyn = grp->syn;
10055   prevsyn = &(grp->syn);
10056   while (currsyn != NULL) {
10057     if (ExpandGeneSynSem (currsyn)) {
10058       nextsyn = currsyn->next;
10059       *(prevsyn) = currsyn->next;
10060       currsyn->next = NULL;
10061       ValNodeFreeData (currsyn);
10062     } else {
10063       nextsyn = currsyn->next;
10064       prevsyn = (ValNodePtr PNTR) &(currsyn->next);
10065     }
10066     currsyn = nextsyn;
10067   }
10068 }
10069 
10070 typedef struct gosstruc {
10071   CharPtr       term;
10072   Char          goid [32];
10073   CharPtr       evidence;
10074   Int4          pmid;
10075   CharPtr       goref;
10076   UserFieldPtr  ufp;
10077 } GosStruc, PNTR GosStrucPtr;
10078 
SortVnpByGssp(VoidPtr ptr1,VoidPtr ptr2)10079 static int LIBCALLBACK SortVnpByGssp (VoidPtr ptr1, VoidPtr ptr2)
10080 
10081 {
10082   int           compare;
10083   GosStrucPtr   gsp1, gsp2;
10084   ValNodePtr    vnp1, vnp2;
10085 
10086   if (ptr1 == NULL || ptr2 == NULL) return 0;
10087   vnp1 = *((ValNodePtr PNTR) ptr1);
10088   vnp2 = *((ValNodePtr PNTR) ptr2);
10089   if (vnp1 == NULL || vnp2 == NULL) return 0;
10090   gsp1 = (GosStrucPtr) vnp1->data.ptrvalue;
10091   gsp2 = (GosStrucPtr) vnp2->data.ptrvalue;
10092   if (gsp1 == NULL || gsp2 == NULL) return 0;
10093 
10094   compare = StringICmp (gsp1->goid, gsp2->goid);
10095   if (compare > 0) {
10096     return 1;
10097   } else if (compare < 0) {
10098     return -1;
10099   }
10100 
10101   compare = StringICmp (gsp1->term, gsp2->term);
10102   if (compare > 0) {
10103     return 1;
10104   } else if (compare < 0) {
10105     return -1;
10106   }
10107 
10108   compare = StringICmp (gsp1->evidence, gsp2->evidence);
10109   if (compare > 0) {
10110     return 1;
10111   } else if (compare < 0) {
10112     return -1;
10113   }
10114 
10115   if (gsp1->pmid == 0) return 1;
10116   if (gsp2->pmid == 0) return -1;
10117   if (gsp1->pmid > gsp2->pmid) {
10118     return 1;
10119   } else if (gsp1->pmid < gsp2->pmid) {
10120     return -1;
10121   }
10122 
10123   return 0;
10124 }
10125 
10126 static CharPtr bsecGoQualType [] = {
10127   "", "Process", "Component", "Function", NULL
10128 };
10129 
10130 static CharPtr bsecGoFieldType [] = {
10131   "", "text string", "go id", "pubmed id", "go ref", "evidence", NULL
10132 };
10133 
SortGoTerms(UserFieldPtr entryhead)10134 static UserFieldPtr SortGoTerms (
10135   UserFieldPtr entryhead
10136 )
10137 
10138 {
10139   UserFieldPtr  entry, topufp, ufp, lastufp;
10140   CharPtr       evidence, goid, goref, textstr;
10141   Char          gid [32];
10142   GosStrucPtr   gsp, lastgsp;
10143   ValNodePtr    head = NULL, vnp;
10144   Int2          j;
10145   ObjectIdPtr   oip;
10146   Int4          pmid;
10147 
10148   if (entryhead == NULL) return entryhead;
10149 
10150   for (entry = entryhead; entry != NULL; entry = entry->next) {
10151     if (entry == NULL || entry->choice != 11) break;
10152     topufp = (UserFieldPtr)  entry->data.ptrvalue;
10153     if (topufp == NULL) continue;
10154 
10155     textstr = NULL;
10156     evidence = NULL;
10157     goid = NULL;
10158     goref = NULL;
10159     pmid = 0;
10160     for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
10161       oip = ufp->label;
10162       if (oip == NULL) continue;
10163       for (j = 0; bsecGoFieldType [j] != NULL; j++) {
10164         if (StringICmp (oip->str, bsecGoFieldType [j]) == 0) break;
10165       }
10166       if (bsecGoFieldType [j] == NULL) continue;
10167       switch (j) {
10168         case 1 :
10169           if (ufp->choice == 1) {
10170             textstr = (CharPtr) ufp->data.ptrvalue;
10171           }
10172           break;
10173         case 2 :
10174           if (ufp->choice == 1) {
10175             goid = (CharPtr) ufp->data.ptrvalue;
10176           } else if (ufp->choice == 2) {
10177             sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
10178             goid = (CharPtr) gid;
10179           }
10180           break;
10181         case 3 :
10182           if (ufp->choice == 2) {
10183             pmid = (Int4) ufp->data.intvalue;
10184           }
10185           break;
10186         case 4 :
10187           if (ufp->choice == 1) {
10188             goref = (CharPtr) ufp->data.ptrvalue;
10189           }
10190           break;
10191         case 5 :
10192           if (ufp->choice == 1) {
10193             evidence = (CharPtr) ufp->data.ptrvalue;
10194           }
10195           break;
10196         default :
10197           break;
10198       }
10199     }
10200 
10201     if (StringDoesHaveText (textstr)) {
10202       gsp = (GosStrucPtr) MemNew (sizeof (GosStruc));
10203       if (gsp != NULL) {
10204         gsp->term = textstr;
10205         StringNCpy_0 (gsp->goid, goid, sizeof (gsp->goid));
10206         gsp->evidence = evidence;
10207         gsp->pmid = pmid;
10208         gsp->goref = goref;
10209         gsp->ufp = entry;
10210         ValNodeAddPointer (&head, 0, (Pointer) gsp);
10211       }
10212     }
10213   }
10214 
10215   if (head == NULL) return entryhead;
10216   head = ValNodeSort (head, SortVnpByGssp);
10217 
10218   entryhead = NULL;
10219   lastgsp = NULL;
10220   lastufp = NULL;
10221   for (vnp = head; vnp != NULL; vnp = vnp->next) {
10222     gsp = (GosStrucPtr) vnp->data.ptrvalue;
10223     if (gsp == NULL || gsp->ufp == NULL) continue;
10224     if (lastgsp != NULL &&
10225         (StringICmp (gsp->term, lastgsp->term) == 0 || StringICmp (gsp->goid, lastgsp->goid) == 0) &&
10226          (gsp->pmid == lastgsp->pmid &&
10227           StringICmp (gsp->goref, lastgsp->goref) == 0 &&
10228           StringICmp (gsp->evidence, lastgsp->evidence) == 0)) {
10229       gsp->ufp->next = NULL;
10230       UserFieldFree (gsp->ufp);
10231     } else {
10232       if (lastufp != NULL) {
10233         lastufp->next = gsp->ufp;
10234       } else {
10235         entryhead = gsp->ufp;
10236       }
10237       lastufp = gsp->ufp;
10238       lastufp->next = NULL;
10239     }
10240     lastgsp = gsp;
10241   }
10242 
10243   ValNodeFreeData (head);
10244 
10245   return entryhead;
10246 }
10247 
SortGoTermsUfp(UserFieldPtr ufp,Pointer userdata)10248 static void SortGoTermsUfp (
10249   UserFieldPtr ufp,
10250   Pointer userdata
10251 )
10252 
10253 {
10254   UserFieldPtr  entry;
10255   Int2          i;
10256   ObjectIdPtr   oip;
10257 
10258   if (ufp == NULL || ufp->choice != 11) return;
10259   oip = ufp->label;
10260   if (oip == NULL) return;
10261   for (i = 0; bsecGoQualType [i] != NULL; i++) {
10262     if (StringICmp (oip->str, bsecGoQualType [i]) == 0) break;
10263   }
10264   if (bsecGoQualType [i] == NULL) return;
10265 
10266   entry = ufp->data.ptrvalue;
10267   if (entry == NULL || entry->choice != 11) return;
10268 
10269   ufp->data.ptrvalue = SortGoTerms (entry);
10270 }
10271 
SortGoTermsSfp(UserObjectPtr uop,Pointer userdata)10272 static void SortGoTermsSfp (
10273   UserObjectPtr uop,
10274   Pointer userdata
10275 )
10276 
10277 {
10278   ObjectIdPtr  oip;
10279 
10280   if (uop == NULL) return;
10281   oip = uop->type;
10282   if (oip == NULL) return;
10283   if (StringCmp (oip->str, "GeneOntology") == 0) {
10284     VisitUserFieldsInUop (uop, userdata, SortGoTermsUfp);
10285   }
10286 }
10287 
CleanupGoTerms(UserFieldPtr entryhead)10288 static void CleanupGoTerms (
10289   UserFieldPtr entryhead
10290 )
10291 
10292 {
10293   UserFieldPtr  entry, topufp, ufp;
10294   CharPtr       goid, goref, str;
10295   Int2          j;
10296   ObjectIdPtr   oip;
10297 
10298   if (entryhead == NULL) return;
10299 
10300   for (entry = entryhead; entry != NULL; entry = entry->next) {
10301     if (entry == NULL || entry->choice != 11) break;
10302     topufp = (UserFieldPtr)  entry->data.ptrvalue;
10303     if (topufp == NULL) continue;
10304 
10305     goid = NULL;
10306     goref = NULL;
10307     for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
10308       oip = ufp->label;
10309       if (oip == NULL) continue;
10310       for (j = 0; bsecGoFieldType [j] != NULL; j++) {
10311         if (StringICmp (oip->str, bsecGoFieldType [j]) == 0) break;
10312       }
10313       if (bsecGoFieldType [j] == NULL) continue;
10314       switch (j) {
10315         case 2 :
10316           if (ufp->choice == 1) {
10317             goid = (CharPtr) ufp->data.ptrvalue;
10318             if (goid != NULL && *goid != '\0') {
10319               if (StringNICmp (goid, "GO:", 3) == 0) {
10320                 str = StringSave (goid + 3);
10321                 ufp->data.ptrvalue = (Pointer) str;
10322                 MemFree (goid);
10323               }
10324             }
10325           }
10326           break;
10327         case 4 :
10328           if (ufp->choice == 1) {
10329             goref = (CharPtr) ufp->data.ptrvalue;
10330             if (goref != NULL && *goref != '\0') {
10331               if (StringNICmp (goref, "GO_REF:", 7) == 0) {
10332                 str = StringSave (goref + 7);
10333                 ufp->data.ptrvalue = (Pointer) str;
10334                 MemFree (goref);
10335               }
10336             }
10337           }
10338           break;
10339         default :
10340           break;
10341       }
10342     }
10343   }
10344 }
10345 
CleanupGoTermsUfp(UserFieldPtr ufp,Pointer userdata)10346 static void CleanupGoTermsUfp (
10347   UserFieldPtr ufp,
10348   Pointer userdata
10349 )
10350 
10351 {
10352   UserFieldPtr  entry;
10353   Int2          i;
10354   ObjectIdPtr   oip;
10355 
10356   if (ufp == NULL || ufp->choice != 11) return;
10357   oip = ufp->label;
10358   if (oip == NULL) return;
10359   for (i = 0; bsecGoQualType [i] != NULL; i++) {
10360     if (StringICmp (oip->str, bsecGoQualType [i]) == 0) break;
10361   }
10362   if (bsecGoQualType [i] == NULL) return;
10363 
10364   entry = ufp->data.ptrvalue;
10365   if (entry == NULL || entry->choice != 11) return;
10366 
10367   CleanupGoTerms (entry);
10368 }
10369 
CleanupGoTermsSfp(UserObjectPtr uop,Pointer userdata)10370 static void CleanupGoTermsSfp (
10371   UserObjectPtr uop,
10372   Pointer userdata
10373 )
10374 
10375 {
10376   ObjectIdPtr  oip;
10377 
10378   if (uop == NULL) return;
10379   oip = uop->type;
10380   if (oip == NULL) return;
10381   if (StringCmp (oip->str, "GeneOntology") == 0) {
10382     VisitUserFieldsInUop (uop, userdata, CleanupGoTermsUfp);
10383   }
10384 }
10385 
CleanUpSgml(CharPtr str)10386 static CharPtr CleanUpSgml (
10387   CharPtr str
10388 )
10389 
10390 {
10391   Int2     ascii_len;
10392   Char     buf [256];
10393   CharPtr  ptr;
10394 
10395   if (StringHasNoText (str)) return NULL;
10396   if (StringChr (str, '&') == NULL) return NULL;
10397 
10398   ascii_len = Sgml2AsciiLen (str);
10399   if (ascii_len + 2 >= sizeof (buf)) return NULL;
10400 
10401   buf [0] = '\0';
10402   Sgml2Ascii (str, buf, ascii_len + 1);
10403   if (StringHasNoText (buf)) return NULL;
10404   if (StringCmp (str, buf) == 0) return NULL;
10405 
10406   ptr = StringChr (buf, '<');
10407   if (ptr != NULL) {
10408     *ptr = ' ';
10409   }
10410   ptr = StringChr (buf, '>');
10411   if (ptr != NULL) {
10412     *ptr = ' ';
10413   }
10414   TrimSpacesAroundString (buf);
10415   Asn2gnbkCompressSpaces (buf);
10416 
10417   return StringSave (buf);
10418 }
10419 
10420 /* special exception for genome pipeline rRNA names */
10421 
NotExceptedRibosomalName(CharPtr name)10422 static Boolean NotExceptedRibosomalName (
10423   CharPtr name
10424 )
10425 
10426 {
10427   Char     ch;
10428   CharPtr  str;
10429 
10430   str = StringStr (name, " ribosomal");
10431   if (str == NULL) return FALSE;
10432 
10433   str += 10;
10434   ch = *str;
10435   while (ch != '\0') {
10436     if (ch == ' ' || IS_DIGIT (ch)) {
10437       /* okay */
10438     } else {
10439       return TRUE;
10440     }
10441     str++;
10442     ch = *str;
10443   }
10444 
10445   return FALSE;
10446 }
10447 
10448 //LCOV_EXCL_START
CleanupSubSourceOrgModOtherFeat(SeqFeatPtr sfp,Pointer userdata)10449 NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (
10450   SeqFeatPtr sfp,
10451   Pointer userdata
10452 )
10453 
10454 {
10455   BioSourcePtr  biop;
10456   OrgNamePtr    onp = NULL;
10457   OrgRefPtr     orp;
10458 
10459   if (sfp == NULL) return;
10460   if (sfp->data.choice != SEQFEAT_BIOSRC) return;
10461   biop = (BioSourcePtr) sfp->data.value.ptrvalue;
10462   if (biop == NULL) return;
10463   orp = biop->org;
10464   if (orp != NULL) {
10465     onp = orp->orgname;
10466     if (orp != NULL) {
10467       CleanupOrgModOther (biop, onp);
10468     }
10469   }
10470   CleanupSubSourceOther (biop, onp);
10471 }
10472 
CleanupSubSourceOrgModOtherDesc(SeqDescrPtr sdp,Pointer userdata)10473 NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (
10474   SeqDescrPtr sdp,
10475   Pointer userdata
10476 )
10477 
10478 {
10479   BioSourcePtr  biop;
10480   OrgNamePtr    onp = NULL;
10481   OrgRefPtr     orp;
10482 
10483   if (sdp == NULL) return;
10484   if (sdp->choice != Seq_descr_source) return;
10485   biop = (BioSourcePtr) sdp->data.ptrvalue;
10486   if (biop == NULL) return;
10487   orp = biop->org;
10488   if (orp != NULL) {
10489     onp = orp->orgname;
10490     if (orp != NULL) {
10491       CleanupOrgModOther (biop, onp);
10492     }
10493   }
10494   CleanupSubSourceOther (biop, onp);
10495 }
10496 //LCOV_EXCL_STOP
10497 
10498 
10499 typedef struct xmltable {
10500   CharPtr  code;
10501   size_t   len;
10502   CharPtr  letter;
10503 } XmlTable, PNTR XmlTablePtr;
10504 
10505 static XmlTable xmlunicodes [] = {
10506   { "&amp",     4, "&"},
10507   { "&apos",    5, "\'"},
10508   { "&gt",      3, ">"},
10509   { "&lt",      3, "<"},
10510   { "&quot",    5, "\""},
10511   { "&#13&#10", 8, ""},
10512   { "&#916",    5, "Delta"},
10513   { "&#945",    5, "alpha"},
10514   { "&#946",    5, "beta"},
10515   { "&#947",    5, "gamma"},
10516   { "&#952",    5, "theta"},
10517   { "&#955",    5, "lambda"},
10518   { "&#956",    5, "mu"},
10519   { "&#957",    5, "nu"},
10520   { "&#8201",   6, " "},
10521   { "&#8206",   6, ""},
10522   { "&#8242",   6, "'"},
10523   { "&#8594",   6, "->"},
10524   { "&#8722",   6, "-"},
10525   { "&#8710",   6, "delta"},
10526   { "&#64257",  7, "fi"},
10527   { "&#64258",  7, "fl"},
10528   { "&#65292",  7, ","},
10529   { NULL,       0, ""}
10530 };
10531 
BSECDecodeXml(CharPtr str)10532 static CharPtr BSECDecodeXml (
10533   CharPtr str
10534 )
10535 
10536 {
10537   Char         ch, nxt;
10538   CharPtr      dst, ptr, src;
10539   Int2         i;
10540   size_t       len;
10541   XmlTablePtr  xtp;
10542 
10543   if (StringHasNoText (str)) return str;
10544 
10545   src = str;
10546   dst = str;
10547   ch = *src;
10548   while (ch != '\0') {
10549     if (ch == '&') {
10550       xtp = NULL;
10551       len = 1;
10552       for (i = 0; xmlunicodes [i].code != NULL; i++) {
10553         if (StringNICmp (src, xmlunicodes [i].code, xmlunicodes [i].len) == 0) {
10554           nxt = *(src +xmlunicodes [i].len);
10555           if (nxt == ';') {
10556             xtp = &(xmlunicodes [i]);
10557             len = xtp->len + 1;
10558             break;
10559           } else if (nxt == ' ' || nxt == '\0') {
10560             xtp = &(xmlunicodes [i]);
10561             len = xtp->len;
10562             break;
10563           }
10564         }
10565       }
10566       if (xtp != NULL) {
10567         if (StringLen (xtp->letter) > 0) {
10568           ptr = xtp->letter;
10569           ch = *ptr;
10570           while (ch != '\0') {
10571             *dst = ch;
10572             dst++;
10573             ptr++;
10574             ch = *ptr;
10575           }
10576         }
10577         src += len;
10578       } else {
10579         *dst = ch;
10580         dst++;
10581         src++;
10582       }
10583     } else {
10584       *dst = ch;
10585       dst++;
10586       src++;
10587     }
10588     ch = *src;
10589   }
10590   *dst = '\0';
10591 
10592   return str;
10593 }
10594 
CleanupFeatureStrings(SeqFeatPtr sfp,Boolean isJscan,Boolean isEmblOrDdbj,Boolean stripSerial,Boolean modernizeFeats,ValNodePtr PNTR publist)10595 static void CleanupFeatureStrings (
10596   SeqFeatPtr sfp,
10597   Boolean isJscan,
10598   Boolean isEmblOrDdbj,
10599   Boolean stripSerial,
10600   Boolean modernizeFeats,
10601   ValNodePtr PNTR publist
10602 )
10603 
10604 {
10605   Uint1                aa;
10606   BioSourcePtr         biop;
10607   Char                 ch;
10608   Uint1                codon [6];
10609   GeneNomenclaturePtr  gnp;
10610   GeneRefPtr           grp;
10611   ImpFeatPtr           ifp;
10612   Boolean              is_fMet = FALSE;
10613   Boolean              is_iMet = FALSE;
10614   Int2                 j;
10615   Boolean              justTrnaText;
10616   size_t               len;
10617   CharPtr              name;
10618   ObjectIdPtr          oip;
10619   OrgNamePtr           onp = NULL;
10620   OrgRefPtr            orp;
10621   PubdescPtr           pdp;
10622   ProtRefPtr           prp;
10623   CharPtr              ptr;
10624   RNAGenPtr            rgp;
10625   RNAQualPtr           rqp;
10626   RnaRefPtr            rrp;
10627   SubSourcePtr         ssp;
10628   CharPtr              str;
10629   CharPtr              suff;
10630   CharPtr              temp;
10631   Char                 tmp [64];
10632   Boolean              trimming_junk;
10633   tRNAPtr              trp;
10634   UserFieldPtr         ufp;
10635   UserObjectPtr        uop;
10636   CharPtr              val;
10637   ValNodePtr           vnp, vnp2;
10638   SeqFeatXrefPtr       xref;
10639 
10640   if (sfp == NULL) return;
10641   BSECDecodeXml (sfp->comment);
10642   CleanVisStringAndCompress (&(sfp->comment));
10643   len = StringLen (sfp->comment);
10644   if (len > 4) {
10645     if (StringCmp (sfp->comment + len - 3, ",..") == 0 ||
10646         StringCmp (sfp->comment + len - 3, ".,.") == 0 ||
10647         StringCmp (sfp->comment + len - 3, "..,") == 0 ||
10648         StringCmp (sfp->comment + len - 3, ",.,") == 0) {
10649       sfp->comment [len - 3] = '.';
10650       sfp->comment [len - 2] = '.';
10651       sfp->comment [len - 1] = '.';
10652     }
10653   }
10654   BSECDecodeXml (sfp->title);
10655   CleanVisString (&(sfp->title));
10656   CleanVisString (&(sfp->except_text));
10657   if (StringDoesHaveText (sfp->except_text)) {
10658     CleanUpExceptText (sfp);
10659   }
10660   CleanDoubleQuote (sfp->comment);
10661   if (StringCmp (sfp->comment, ".") == 0) {
10662     sfp->comment = MemFree (sfp->comment);
10663   }
10664   /*
10665   if (sfp->ext != NULL) {
10666     VisitUserObjectsInUop (sfp->ext, NULL, SortGoTermsSfp);
10667   }
10668   */
10669   if (sfp->ext != NULL) {
10670     VisitUserObjectsInUop (sfp->ext, NULL, CleanupGoTermsSfp);
10671   }
10672 
10673   for (xref = sfp->xref; xref != NULL; xref = xref->next) {
10674     if (xref->data.choice != SEQFEAT_PROT) continue;
10675     prp = (ProtRefPtr) xref->data.value.ptrvalue;
10676     if (prp == NULL) continue;
10677     RemoveFlankingQuotes (&(prp->desc));
10678     RemoveFlankingQuotesList (&(prp->name));
10679     CleanVisStringAndCompress (&(prp->desc));
10680     CleanVisStringListAndCompress (&(prp->name));
10681   }
10682 
10683   switch (sfp->data.choice) {
10684     case SEQFEAT_BOND :
10685     case SEQFEAT_PSEC_STR :
10686     case SEQFEAT_COMMENT:
10687       return;
10688     case SEQFEAT_SITE :
10689       for (j = 0; bsecSiteList [j] != NULL; j++) {
10690         StringNCpy_0 (tmp, bsecSiteList [j], sizeof (tmp));
10691         len = StringLen (tmp);
10692         if (StringNICmp (sfp->comment, tmp, len) == 0) {
10693           if (sfp->data.value.intvalue == 0 || sfp->data.value.intvalue == 255) {
10694             sfp->data.value.intvalue = j;
10695             if (StringHasNoText (sfp->comment + len) || StringICmp (sfp->comment + len, " site") == 0) {
10696               sfp->comment = MemFree (sfp->comment);
10697             }
10698           }
10699         } else {
10700           val = tmp;
10701           ch = *val;
10702           while (ch != '\0') {
10703             if (ch == '-') {
10704               *val = ' ';
10705             }
10706             val++;
10707             ch = *val;
10708           }
10709           if (StringNICmp (sfp->comment, tmp, len) == 0) {
10710             if (sfp->data.value.intvalue == 0 || sfp->data.value.intvalue == 255) {
10711               sfp->data.value.intvalue = j;
10712               if (StringHasNoText (sfp->comment + len) || StringICmp (sfp->comment + len, " site") == 0) {
10713                 sfp->comment = MemFree (sfp->comment);
10714               }
10715             }
10716           }
10717         }
10718       }
10719       break;
10720     default :
10721       break;
10722   }
10723   if (sfp->data.value.ptrvalue == NULL) return;
10724 
10725   biop = NULL;
10726   orp = NULL;
10727   switch (sfp->data.choice) {
10728     case SEQFEAT_ORG :
10729       orp = (OrgRefPtr) sfp->data.value.ptrvalue;
10730       break;
10731     case SEQFEAT_BIOSRC :
10732       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
10733       if (biop != NULL) {
10734         orp = biop->org;
10735       }
10736     default :
10737       break;
10738   }
10739   if (orp != NULL && sfp->qual != NULL) {
10740     GbqualToOrpMod (&(sfp->qual), &(orp->mod));
10741   }
10742 
10743   biop = NULL;
10744   orp = NULL;
10745   switch (sfp->data.choice) {
10746     case SEQFEAT_GENE :
10747       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
10748       if (sfp->xref != NULL) {
10749         HandleXrefOnGene (sfp);
10750       }
10751       BSECDecodeXml (grp->locus);
10752       CleanVisStringAndCompress (&(grp->locus));
10753       /*
10754       if (isJscan && StringDoesHaveText (grp->locus)) {
10755         ptr = CleanUpSgml (grp->locus);
10756         if (ptr != NULL) {
10757           grp->locus = MemFree (grp->locus);
10758           grp->locus = StringSave (ptr);
10759         }
10760       }
10761       */
10762       CleanVisString (&(grp->allele));
10763       CleanVisStringAndCompress (&(grp->desc));
10764       CleanVisString (&(grp->maploc));
10765       CleanVisString (&(grp->locus_tag));
10766       ExpandGeneSynList (grp);
10767       /*
10768       if (isJscan && grp->syn != NULL) {
10769         for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
10770           str = (CharPtr) vnp->data.ptrvalue;
10771           if (StringHasNoText (str)) continue;
10772           ptr = CleanUpSgml (str);
10773           if (ptr != NULL) {
10774             vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
10775             vnp->data.ptrvalue = StringSave (ptr);
10776           }
10777         }
10778       }
10779       */
10780       for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
10781         str = (CharPtr) vnp->data.ptrvalue;
10782         if (StringHasNoText (str)) continue;
10783         BSECDecodeXml (str);
10784       }
10785       CleanVisStringListCaseSensitive (&(grp->syn));
10786       grp->syn = ValNodeSort (grp->syn, SortVnpByStringCS);
10787       grp->syn = UniqueStringValNodeCS (grp->syn);
10788       grp->syn = ValNodeSort (grp->syn, SortVnpByStringCILCFirst);
10789       CleanDoubleQuote (grp->locus);
10790       CleanDoubleQuote (grp->allele);
10791       CleanDoubleQuote (grp->desc);
10792       /*
10793       if (isJscan && StringDoesHaveText (grp->desc)) {
10794         ptr = CleanUpSgml (grp->desc);
10795         if (ptr != NULL) {
10796           grp->desc = MemFree (grp->desc);
10797           grp->desc = StringSave (ptr);
10798         }
10799       }
10800       */
10801       CleanDoubleQuote (grp->maploc);
10802       CleanDoubleQuote (grp->locus_tag);
10803       CleanDoubleQuoteList (grp->syn);
10804       FixOldDbxrefs (grp->db, isEmblOrDdbj);
10805       FixNumericDbxrefs (grp->db);
10806       grp->db = ValNodeSort (grp->db, SortDbxref);
10807       CleanupDuplicateDbxrefs (&(grp->db));
10808       CleanupObsoleteDbxrefs (&(grp->db));
10809       CleanupGoDbxrefs (grp->db);
10810       /* now move grp->dbxref to sfp->dbxref */
10811       vnp = grp->db;
10812       grp->db = NULL;
10813       ValNodeLink ((&sfp->dbxref), vnp);
10814       if (grp->locus != NULL && grp->syn != NULL) {
10815         for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
10816           str = (CharPtr) vnp->data.ptrvalue;
10817           if (StringCmp (grp->locus, str) == 0) {
10818             vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
10819           }
10820         }
10821         CleanVisStringListCaseSensitive (&(grp->syn));
10822       }
10823       gnp = grp->formal_name;
10824       if (gnp != NULL) {
10825         FixOldDbxref (gnp->source);
10826         FixNumericDbxref (gnp->source);
10827       }
10828       /*
10829       if (grp->locus != NULL && sfp->comment != NULL) {
10830         if (StringCmp (grp->locus, sfp->comment) == 0) {
10831           sfp->comment = MemFree (sfp->comment);
10832         }
10833       }
10834       */
10835       break;
10836     case SEQFEAT_ORG :
10837       orp = (OrgRefPtr) sfp->data.value.ptrvalue;
10838       break;
10839     case SEQFEAT_CDREGION :
10840       if (sfp->xref != NULL && sfp->product != NULL) {
10841         HandleXrefOnCDS (sfp);
10842       }
10843       break;
10844     case SEQFEAT_PROT :
10845       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
10846       for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
10847         str = (CharPtr) vnp->data.ptrvalue;
10848         if (StringHasNoText (str)) continue;
10849         CleanupECNumber (str);
10850         if (ECNumberCanBeSplit (str)) {
10851           ptr = str;
10852           ch = *ptr;
10853           while (ch != '\0' && ch != ' ' && ch != ';') {
10854             ptr++;
10855             ch = *ptr;
10856           }
10857           if (ch != '\0') {
10858             *ptr = '\0';
10859             ptr++;
10860             vnp2 = ValNodeCopyStr (NULL, 0, ptr);
10861             if (vnp2 != NULL) {
10862               vnp2->next = vnp->next;
10863               vnp->next = vnp2;
10864             }
10865           }
10866         }
10867       }
10868       for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
10869         str = (CharPtr) vnp->data.ptrvalue;
10870         if (StringHasNoText (str)) continue;
10871         BSECDecodeXml (str);
10872       }
10873       BSECDecodeXml (prp->desc);
10874       CleanVisStringAndCompress (&(prp->desc));
10875       CleanVisStringJunkListAndCompress (&(prp->name));
10876       CleanVisStringList (&(prp->ec));
10877       CleanVisStringJunkListAndCompress (&(prp->activity));
10878       CleanDoubleQuote (prp->desc);
10879       CleanDoubleQuoteList (prp->name);
10880       CleanDoubleQuoteList (prp->ec);
10881       CleanDoubleQuoteList (prp->activity);
10882       RemoveFlankingQuotes (&(prp->desc));
10883       RemoveFlankingQuotesList (&(prp->name));
10884       FixOldDbxrefs (prp->db, isEmblOrDdbj);
10885       FixNumericDbxrefs (prp->db);
10886       prp->db = ValNodeSort (prp->db, SortDbxref);
10887       CleanupDuplicateDbxrefs (&(prp->db));
10888       CleanupObsoleteDbxrefs (&(prp->db));
10889       CleanupGoDbxrefs (prp->db);
10890       /* now move prp->dbxref to sfp->dbxref */
10891       vnp = prp->db;
10892       prp->db = NULL;
10893       ValNodeLink ((&sfp->dbxref), vnp);
10894       if (prp->processed != 3 && prp->processed != 4 && prp->processed != 5 &&
10895           prp->name == NULL && sfp->comment != NULL) {
10896         if (StringICmp (sfp->comment, "putative") != 0) {
10897           ValNodeAddStr (&(prp->name), 0, sfp->comment);
10898           sfp->comment = NULL;
10899         }
10900       }
10901       if (prp->processed == 3 || prp->processed == 4 || prp->processed == 5) {
10902         if (prp->name != NULL) {
10903           str = (CharPtr) prp->name->data.ptrvalue;
10904           if ((StringStr (str, "putative") != NULL ||
10905                StringStr (str, "put. ") != NULL) &&
10906               sfp->comment == NULL) {
10907             sfp->comment = StringSave ("putative");
10908           }
10909           if (! InformativeString (str)) {
10910             prp->name = ValNodeFreeData (prp->name);
10911           }
10912         }
10913       }
10914       if ((prp->processed == 1 || prp->processed == 2) && prp->name == NULL) {
10915         ValNodeCopyStr (&(prp->name), 0, "unnamed");
10916       }
10917       for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
10918         str = (CharPtr) vnp->data.ptrvalue;
10919         if (StringICmp (str, "RbcL") == 0 || StringICmp (str, "rubisco large subunit") == 0) {
10920           vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit");
10921           MemFree (str);
10922         } else if (StringICmp (str, "RbcS") == 0 || StringICmp (str, "rubisco small subunit") == 0) {
10923           vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit");
10924           MemFree (str);
10925         }
10926       }
10927       /*
10928       if (StringDoesHaveText (prp->desc)) {
10929         for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
10930           str = (CharPtr) vnp->data.ptrvalue;
10931           if (StringHasNoText (str)) continue;
10932           if (StringCmp (prp->desc, str) == 0) {
10933             prp->desc = MemFree (prp->desc);
10934           }
10935         }
10936       }
10937       */
10938       break;
10939     case SEQFEAT_RNA :
10940       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
10941       if (rrp->ext.choice == 1) {
10942         BSECDecodeXml ((CharPtr) rrp->ext.value.ptrvalue);
10943         str = (CharPtr) rrp->ext.value.ptrvalue;
10944         CleanVisStringAndCompress ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
10945         CleanDoubleQuote ((CharPtr) rrp->ext.value.ptrvalue);
10946         RemoveFlankingQuotes ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
10947         if (rrp->ext.value.ptrvalue == NULL) {
10948           rrp->ext.choice = 0;
10949         } else if (rrp->type == 4) {
10950           name = (CharPtr) rrp->ext.value.ptrvalue;
10951           len = StringLen (name);
10952           if (len > 5) {
10953             if (len > 16 && StringNICmp (name + len - 16, " ribosomal RNA .", 14) == 0) {
10954               name [len-2] = '\0';
10955               len = StringLen (name);
10956             }
10957             if (len > 14 && StringNICmp (name + len - 14, " ribosomal rRNA", 14) == 0) {
10958             } else if (StringNICmp (name + len - 5, " rRNA", 5) == 0) {
10959               str = MemNew (len + 10);
10960               if (str != NULL) {
10961                 StringNCpy (str, name, len - 5);
10962                 StringCat (str, " ribosomal RNA");
10963                 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10964                 rrp->ext.value.ptrvalue = (Pointer) str;
10965               }
10966             } else if (StringNICmp (name + len - 5, "_rRNA", 5) == 0) {
10967               str = MemNew (len + 10);
10968               if (str != NULL) {
10969                 StringNCpy (str, name, len - 5);
10970                 StringCat (str, " ribosomal RNA");
10971                 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10972                 rrp->ext.value.ptrvalue = (Pointer) str;
10973               }
10974             }
10975           }
10976         } else if (rrp->type == 3) {
10977           name = (CharPtr) rrp->ext.value.ptrvalue;
10978           aa = ParseTRnaString (name, &justTrnaText, codon, FALSE);
10979           if (aa != 0) {
10980             is_fMet = (Boolean) (StringStr (name, "fMet") != NULL);
10981             is_iMet = (Boolean) (StringStr (name, "iMet") != NULL);
10982             rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10983             trp = (tRNAPtr) MemNew (sizeof (tRNA));
10984             if (trp != NULL) {
10985               trp->aatype = 2;
10986               for (j = 0; j < 6; j++) {
10987                 trp->codon [j] = 255;
10988               }
10989               if (justTrnaText) {
10990                 for (j = 0; j < 6; j++) {
10991                   trp->codon [j] = codon [j];
10992                 }
10993               }
10994               trp->aa = aa;
10995               rrp->ext.choice = 2;
10996               rrp->ext.value.ptrvalue = (Pointer) trp;
10997               CleanupTrna (sfp, trp);
10998             }
10999             if (is_fMet) {
11000               if (sfp->comment == NULL) {
11001                 sfp->comment = StringSave ("fMet");
11002               } else {
11003                 len = StringLen (sfp->comment) + StringLen ("fMet") + 5;
11004                 str = MemNew (sizeof (Char) * len);
11005                 StringCpy (str, sfp->comment);
11006                 StringCat (str, "; ");
11007                 StringCat (str, "fMet");
11008                 sfp->comment = MemFree (sfp->comment);
11009                 sfp->comment = str;
11010               }
11011             }
11012             if (is_iMet) {
11013               if (sfp->comment == NULL) {
11014                 sfp->comment = StringSave ("iMet");
11015               } else {
11016                 len = StringLen (sfp->comment) + StringLen ("iMet") + 5;
11017                 str = MemNew (sizeof (Char) * len);
11018                 StringCpy (str, sfp->comment);
11019                 StringCat (str, "; ");
11020                 StringCat (str, "iMet");
11021                 sfp->comment = MemFree (sfp->comment);
11022                 sfp->comment = str;
11023               }
11024             }
11025           }
11026         }
11027       } else if (rrp->ext.choice == 2) {
11028         trp = (tRNAPtr) rrp->ext.value.ptrvalue;
11029         CleanupTrna (sfp, trp);
11030       } else if (rrp->type == 3 && (! StringHasNoText (sfp->comment))) {
11031         aa = ParseTRnaString (sfp->comment, &justTrnaText, codon, TRUE);
11032         if (aa != 0) {
11033           trp = (tRNAPtr) MemNew (sizeof (tRNA));
11034           if (trp != NULL) {
11035             trp->aatype = 2;
11036             for (j = 0; j < 6; j++) {
11037               trp->codon [j] = 255;
11038             }
11039             if (justTrnaText) {
11040               for (j = 0; j < 6; j++) {
11041                 trp->codon [j] = codon [j];
11042               }
11043             }
11044             trp->aa = aa;
11045             rrp->ext.choice = 2;
11046             rrp->ext.value.ptrvalue = (Pointer) trp;
11047             if (justTrnaText) {
11048               if (StringCmp (sfp->comment, "tRNA-fMet") != 0 &&
11049                   StringCmp (sfp->comment, "fMet") != 0 &&
11050                   StringCmp (sfp->comment, "fMet tRNA") != 0 &&
11051                   StringCmp (sfp->comment, "fMet-tRNA") != 0) {
11052                 sfp->comment = MemFree (sfp->comment);
11053               } else {
11054                 sfp->comment = MemFree (sfp->comment);
11055                 sfp->comment = StringSave ("fMet");
11056               }
11057               if (StringCmp (sfp->comment, "tRNA-iMet") != 0 &&
11058                   StringCmp (sfp->comment, "iMet") != 0 &&
11059                   StringCmp (sfp->comment, "iMet tRNA") != 0 &&
11060                   StringCmp (sfp->comment, "iMet-tRNA") != 0) {
11061                 sfp->comment = MemFree (sfp->comment);
11062               } else {
11063                 sfp->comment = MemFree (sfp->comment);
11064                 sfp->comment = StringSave ("iMet");
11065               }
11066             }
11067           }
11068         }
11069       }
11070       if (rrp->ext.choice == 3) {
11071         rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
11072         if (rgp != NULL) {
11073           str = (CharPtr) rgp->product;
11074           CleanVisStringAndCompress (&(rgp->product));
11075           CleanDoubleQuote (rgp->product);
11076           RemoveFlankingQuotes (&(rgp->product));
11077           if (StringICmp (rgp->product, "internal transcribed spacer 1 (ITS1)") == 0) {
11078             rgp->product = MemFree (rgp->product);
11079             rgp->product = StringSave ("internal transcribed spacer 1");
11080           } else if (StringICmp (rgp->product, "internal transcribed spacer 2 (ITS2)") == 0) {
11081             rgp->product = MemFree (rgp->product);
11082             rgp->product = StringSave ("internal transcribed spacer 2");
11083           } else if (StringICmp (rgp->product, "internal transcribed spacer 3 (ITS3)") == 0) {
11084             rgp->product = MemFree (rgp->product);
11085             rgp->product = StringSave ("internal transcribed spacer 3");
11086           }
11087           CleanVisStringAndCompress (&(rgp->_class));
11088           CleanDoubleQuote (rgp->_class);
11089           for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
11090             CleanVisStringAndCompress (&(rqp->qual));
11091             CleanDoubleQuote (rqp->qual);
11092             CleanVisStringAndCompress (&(rqp->val));
11093             CleanDoubleQuote (rqp->val);
11094           }
11095         }
11096       }
11097       if (rrp->ext.choice == 0 && sfp->comment != NULL && rrp->type == 4) {
11098         len = StringLen (sfp->comment);
11099         if (len > 15 && len < 20) {
11100           if (StringNICmp (sfp->comment + len - 15, "S ribosomal RNA", 15) == 0) {
11101             rrp->ext.choice = 1;
11102             rrp->ext.value.ptrvalue = sfp->comment;
11103             sfp->comment = NULL;
11104           }
11105         } else if (len > 6 && len < 20) {
11106           if (StringNICmp (sfp->comment + len - 6, "S rRNA", 6) == 0) {
11107             rrp->ext.choice = 1;
11108             rrp->ext.value.ptrvalue = sfp->comment;
11109             sfp->comment = NULL;
11110           }
11111         }
11112       }
11113 /*
11114  * This section has been commented out based on a request by DeAnne Cravaritis.
11115  * If left in, this causes unexpected results when RNA comments are copied to
11116  * the product name or vice versa.
11117       if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
11118         if (StringICmp ((CharPtr) rrp->ext.value.ptrvalue, sfp->comment) == 0) {
11119           sfp->comment = MemFree (sfp->comment);
11120         }
11121       }
11122 */
11123       if (rrp->type == 4 && rrp->ext.choice == 1 ) {
11124         name = (CharPtr) rrp->ext.value.ptrvalue;
11125         len = StringLen (name);
11126         if (len > 5 && NotExceptedRibosomalName (name)) {
11127           suff = NULL;
11128           str = StringStr (name, " ribosomal");
11129           if (str != NULL) {
11130             suff = str + 10;
11131             ch = *suff;
11132             if (ch != '\0' && ch != ' ') {
11133               suff = NULL;
11134               str = NULL;
11135             }
11136           }
11137           if (str == NULL) {
11138             str = StringStr (name, " rRNA");
11139             if (str != NULL) {
11140               suff = str + 5;
11141               ch = *suff;
11142               if (ch != '\0' && ch != ' ') {
11143                 suff = NULL;
11144                 str = NULL;
11145               }
11146             }
11147           }
11148           if (suff != NULL && StringNICmp (suff, " RNA", 4) == 0) {
11149             suff += 4;
11150           }
11151           if (suff != NULL && StringNICmp (suff, " DNA", 4) == 0) {
11152             suff += 4;
11153           }
11154           if (suff != NULL && StringNICmp (suff, " ribosomal", 10) == 0) {
11155             suff += 10;
11156           }
11157           TrimSpacesAroundString (suff);
11158           if (str != NULL) {
11159             *str = '\0';
11160             len = StringLen (name);
11161             if (StringHasNoText (suff)) {
11162               suff = NULL;
11163             }
11164             if (suff != NULL) {
11165               len += StringLen (suff) + 2;
11166             }
11167             str = MemNew (len + 15);
11168             if (str != NULL) {
11169               StringCpy (str, name);
11170               StringCat (str, " ribosomal RNA");
11171               if (suff != NULL) {
11172                 ch = *suff;
11173                 if (ch != ',' && ch != ';') {
11174                   StringCat (str, " ");
11175                 }
11176                 StringCat (str, suff);
11177               }
11178               rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11179               rrp->ext.value.ptrvalue = (Pointer) str;
11180             }
11181           }
11182         }
11183         name = (CharPtr) rrp->ext.value.ptrvalue;
11184         len = StringLen (name);
11185         if (len > 5) {
11186           ch = *name;
11187           while (ch != '\0' && (ch == '.' || (IS_DIGIT (ch)))) {
11188             name++;
11189             ch = *name;
11190           }
11191           /*
11192           if (ch == 's' && StringCmp (name, "s ribosomal RNA") == 0) {
11193             *name = 'S';
11194           }
11195           */
11196           if (ch == 's' && name [1] == ' ') {
11197             *name = 'S';
11198           }
11199         }
11200         StrStripSpaces ((CharPtr) rrp->ext.value.ptrvalue);
11201         name = (CharPtr) rrp->ext.value.ptrvalue;
11202         len = StringLen (name);
11203         if (len > 17) {
11204           if (StringNICmp (name + len - 17, "ribosomal RNA RNA", 17) == 0) {
11205             *(name + len - 4) = '\0';
11206           }
11207         }
11208         trimming_junk = TRUE;
11209         while (trimming_junk) {
11210           StrStripSpaces ((CharPtr) rrp->ext.value.ptrvalue);
11211           name = (CharPtr) rrp->ext.value.ptrvalue;
11212           ptr = StringStr (name, "ribosomal ribosomal");
11213           if (ptr != NULL) {
11214             suff = ptr + 19;
11215             *(ptr + 10) = '\0';
11216             temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11217             TrimSpacesAroundString (suff);
11218             StringCpy (temp, name);
11219             if (suff [0] != ' ' && suff [0] != '\0') {
11220               StringCat (temp, " ");
11221             }
11222             StringCat (temp, suff);
11223             rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11224             rrp->ext.value.ptrvalue = (Pointer) temp;
11225           } else {
11226             ptr = StringStr (name, "RNA RNA");
11227             if (ptr != NULL) {
11228               suff = ptr + 7;
11229               *(ptr + 4) = '\0';
11230               temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11231               TrimSpacesAroundString (suff);
11232               StringCpy (temp, name);
11233               if (suff [0] != ' ' && suff [0] != '\0') {
11234                 StringCat (temp, " ");
11235               }
11236               StringCat (temp, suff);
11237               rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11238               rrp->ext.value.ptrvalue = (Pointer) temp;
11239             } else {
11240               ptr = StringStr (name, "ribosomal RNA ribosomal");
11241               if (ptr != NULL) {
11242                 suff = ptr + 23;
11243                 *(ptr + 14) = '\0';
11244                 temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11245                 TrimSpacesAroundString (suff);
11246                 StringCpy (temp, name);
11247                 if (suff [0] != ' ' && suff [0] != '\0') {
11248                   StringCat (temp, " ");
11249                 }
11250                 StringCat (temp, suff);
11251                 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11252                 rrp->ext.value.ptrvalue = (Pointer) temp;
11253               } else {
11254                 ptr = StringStr (name, "ribosomal rRNA");
11255                 if (ptr != NULL) {
11256                   suff = ptr + 14;
11257                   *(ptr + 10) = '\0';
11258                   temp = MemNew (StringLen (name) + StringLen (" RNA") + StringLen (suff) + 2);
11259                   TrimSpacesAroundString (suff);
11260                   StringCpy (temp, name);
11261                   StringCat (temp, " RNA");
11262                   if (suff [0] != ' ' && suff [0] != '\0') {
11263                     StringCat (temp, " ");
11264                   }
11265                   StringCat (temp, suff);
11266                   rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11267                   rrp->ext.value.ptrvalue = (Pointer) temp;
11268                 } else {
11269                   ptr = StringStr (name, "RNA rRNA");
11270                   if (ptr != NULL) {
11271                     suff = ptr + 8;
11272                     *(ptr + 3) = '\0';
11273                     temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11274                     TrimSpacesAroundString (suff);
11275                     StringCpy (temp, name);
11276                     if (suff [0] != ' ' && suff [0] != '\0') {
11277                       StringCat (temp, " ");
11278                     }
11279                     StringCat (temp, suff);
11280                     rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11281                     rrp->ext.value.ptrvalue = (Pointer) temp;
11282                   } else {
11283                     trimming_junk = FALSE;
11284                   }
11285                 }
11286               }
11287             }
11288           }
11289         }
11290         TrimSpacesAroundString ((CharPtr) rrp->ext.value.ptrvalue);
11291         /*
11292         name = (CharPtr) rrp->ext.value.ptrvalue;
11293         if (StringICmp (name, "16S rRNA. Bacterial SSU") == 0) {
11294           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11295           rrp->ext.value.ptrvalue = StringSave ("16S ribosomal RNA");
11296         } else if (StringICmp (name, "23S rRNA. Bacterial LSU") == 0) {
11297           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11298           rrp->ext.value.ptrvalue = StringSave ("23S ribosomal RNA");
11299         } else if (StringICmp (name, "5S rRNA. Bacterial TSU") == 0) {
11300           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11301           rrp->ext.value.ptrvalue = StringSave ("5S ribosomal RNA");
11302         } else if (StringICmp (name, "Large Subunit Ribosomal RNA; lsuRNA; 23S ribosomal RNA") == 0) {
11303           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11304           rrp->ext.value.ptrvalue = StringSave ("23S ribosomal RNA");
11305         } else if (StringICmp (name, "Small Subunit Ribosomal RNA; ssuRNA; 16S ribosomal RNA") == 0) {
11306           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11307           rrp->ext.value.ptrvalue = StringSave ("16S ribosomal RNA");
11308         } else if (StringICmp (name, "Small Subunit Ribosomal RNA; ssuRNA; SSU ribosomal RNA") == 0) {
11309           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11310           rrp->ext.value.ptrvalue = StringSave ("small subunit ribosomal RNA");
11311         } else if (StringICmp (name, "Large Subunit Ribosomal RNA; lsuRNA; LSU ribosomal RNA") == 0) {
11312           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11313           rrp->ext.value.ptrvalue = StringSave ("large subunit ribosomal RNA");
11314         }
11315         */
11316       }
11317       /*
11318       if (rrp->type == 2 && rrp->ext.choice == 0 && sfp->comment != NULL) {
11319         rrp->ext.choice = 1;
11320         rrp->ext.value.ptrvalue = sfp->comment;
11321         sfp->comment = NULL;
11322       }
11323       */
11324       if (rrp->type == 2 && rrp->ext.choice == 0 && sfp->comment != NULL) {
11325         len = StringLen (sfp->comment);
11326         if (len > 5) {
11327           if (StringNICmp (sfp->comment + len - 4, " RNA", 4) == 0 ||
11328               StringNICmp (sfp->comment + len - 5, " mRNA", 5) == 0) {
11329             rrp->ext.choice = 1;
11330             rrp->ext.value.ptrvalue = sfp->comment;
11331             sfp->comment = NULL;
11332           }
11333         }
11334       }
11335       if (rrp->type == 255 || rrp->type == 10) {
11336         name = GetRNARefProductString (rrp, NULL);
11337         if (StringICmp (name, "its1") == 0 || StringICmp (name, "its 1") == 0) {
11338           SetRNARefProductString (rrp, NULL, "internal transcribed spacer 1", ExistingTextOption_replace_old);
11339         } else if (StringICmp (name, "its2") == 0 || StringICmp (name, "its 2") == 0) {
11340           SetRNARefProductString (rrp, NULL, "internal transcribed spacer 2", ExistingTextOption_replace_old);
11341         } else if (StringICmp (name, "its3") == 0 || StringICmp (name, "its 3") == 0) {
11342           SetRNARefProductString (rrp, NULL, "internal transcribed spacer 3", ExistingTextOption_replace_old);
11343         }
11344         name = MemFree (name);
11345       }
11346       if ((rrp->type == 255 || rrp->type == 10) && rrp->ext.choice == 0 && sfp->comment != NULL) {
11347         if (StringICmp (sfp->comment, "internal transcribed spacer 1") == 0 ||
11348             StringICmp (sfp->comment, "internal transcribed spacer 2") == 0 ||
11349             StringICmp (sfp->comment, "internal transcribed spacer 3") == 0) {
11350           rrp->ext.choice = 1;
11351           rrp->ext.value.ptrvalue = sfp->comment;
11352           sfp->comment = NULL;
11353         } else if (StringICmp (sfp->comment, "internal transcribed spacer 1 (ITS1)") == 0 ||
11354             StringICmp (sfp->comment, "internal transcribed spacer 2 (ITS2)") == 0 ||
11355             StringICmp (sfp->comment, "internal transcribed spacer 3 (ITS3)") == 0) {
11356           ptr = StringStr (sfp->comment, " (");
11357           if (ptr != NULL) {
11358             *ptr = '\0';
11359           }
11360           rrp->ext.choice = 1;
11361           rrp->ext.value.ptrvalue = sfp->comment;
11362           sfp->comment = NULL;
11363         } else if (StringICmp (sfp->comment, "ITS1") == 0 || StringICmp (sfp->comment, "ITS 1") == 0) {
11364           rrp->ext.choice = 1;
11365           rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 1");
11366           sfp->comment = MemFree (sfp->comment);
11367         } else if (StringICmp (sfp->comment, "ITS2") == 0 || StringICmp (sfp->comment, "ITS 2") == 0) {
11368           rrp->ext.choice = 1;
11369           rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 2");
11370           sfp->comment = MemFree (sfp->comment);
11371         } else if (StringICmp (sfp->comment, "ITS3") == 0 || StringICmp (sfp->comment, "ITS 3") == 0) {
11372           rrp->ext.choice = 1;
11373           rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 3");
11374           sfp->comment = MemFree (sfp->comment);
11375         }
11376       }
11377       break;
11378     case SEQFEAT_PUB :
11379       pdp = (PubdescPtr) sfp->data.value.ptrvalue;
11380       CleanDoubleQuote (pdp->comment);
11381       NormalizePubdesc (pdp, stripSerial, TRUE, publist);
11382       break;
11383     case SEQFEAT_SEQ :
11384       break;
11385     case SEQFEAT_IMP :
11386       ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
11387       CleanVisString (&(ifp->key));
11388       CleanVisString (&(ifp->loc));
11389       CleanVisString (&(ifp->descr));
11390       break;
11391     case SEQFEAT_REGION :
11392       CleanVisStringAndCompress ((CharPtr PNTR) &(sfp->data.value.ptrvalue));
11393       CleanDoubleQuote ((CharPtr) sfp->data.value.ptrvalue);
11394       if (sfp->data.value.ptrvalue == NULL) {
11395         sfp->data.choice = SEQFEAT_COMMENT;
11396       } else {
11397         if (sfp->ext != NULL) {
11398           uop = FindUopByTag (sfp->ext, "cddScoreData");
11399           if (uop != NULL) {
11400             for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
11401               if (ufp->choice != 1) continue;
11402               oip = ufp->label;
11403               if (oip == NULL) continue;
11404               if (StringICmp (oip->str, "definition") == 0) {
11405                 CleanVisStringAndCompress ((CharPtr PNTR) &(ufp->data.ptrvalue));
11406                 CleanDoubleQuote ((CharPtr) ufp->data.ptrvalue);
11407               }
11408             }
11409           }
11410         }
11411       }
11412       break;
11413     case SEQFEAT_COMMENT :
11414       break;
11415     case SEQFEAT_BOND :
11416       break;
11417     case SEQFEAT_SITE :
11418       break;
11419     case SEQFEAT_RSITE :
11420       break;
11421     case SEQFEAT_USER :
11422       VisitAllUserObjectsInUop ((UserObjectPtr) sfp->data.value.ptrvalue, NULL, CleanUserObject);
11423       break;
11424     case SEQFEAT_TXINIT :
11425       break;
11426     case SEQFEAT_NUM :
11427       break;
11428     case SEQFEAT_PSEC_STR :
11429       break;
11430     case SEQFEAT_NON_STD_RESIDUE :
11431       break;
11432     case SEQFEAT_HET :
11433       break;
11434     case SEQFEAT_BIOSRC :
11435       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
11436       if (biop != NULL) {
11437         if (biop->genome == GENOME_virion) {
11438           biop->genome = GENOME_unknown;
11439         }
11440         orp = biop->org;
11441         if (orp != NULL) {
11442           CleanVisStringListAndCompress (&(orp->mod));
11443           OrpModToSubSource (&(orp->mod), &(biop->subtype));
11444           onp = orp->orgname;
11445           if (onp != NULL) {
11446             CleanupOrgModOther (biop, onp);
11447           }
11448         }
11449         biop->subtype = SortSubSourceList (biop->subtype);
11450         CleanSubSourceList (&(biop->subtype), biop->genome);
11451         CleanupSubSourceOther (biop, onp);
11452         biop->subtype = SortSubSourceList (biop->subtype);
11453         if (modernizeFeats) {
11454           ModernizePCRPrimers (biop);
11455         }
11456         CleanupPCRReactionSet (&(biop->pcr_primers));
11457         if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) {
11458           for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
11459             if (ssp->subtype == SUBSRC_plasmid_name) {
11460               biop->genome = GENOME_plasmid;
11461             }
11462           }
11463         }
11464       }
11465       break;
11466     default :
11467       break;
11468   }
11469   if (orp != NULL) {
11470     CleanVisStringAndCompress (&(orp->taxname));
11471     CleanVisStringAndCompress (&(orp->common));
11472     CleanVisStringList (&(orp->mod));
11473     CleanVisStringList (&(orp->syn));
11474     FixOldDbxrefs (orp->db, isEmblOrDdbj);
11475     FixNumericDbxrefs (orp->db);
11476     orp->db = ValNodeSort (orp->db, SortDbxref);
11477     orp->syn = ValNodeSort (orp->syn, SortVnpByString);
11478     orp->syn = UniqueValNode (orp->syn);
11479     CleanupDuplicateDbxrefs (&(orp->db));
11480     CleanupObsoleteDbxrefs (&(orp->db));
11481     CleanupGoDbxrefs (orp->db);
11482     onp = orp->orgname;
11483     while (onp != NULL) {
11484       CleanVisString (&(onp->attrib));
11485       CleanVisString (&(onp->lineage));
11486       CleanVisString (&(onp->div));
11487       OrpModToOrgMod (&(orp->mod), &(onp->mod));
11488       onp->mod = SortOrgModList (onp->mod);
11489       CleanOrgModListEx (&(onp->mod), orp->common);
11490       onp->mod = SortOrgModList (onp->mod);
11491       onp = onp->next;
11492     }
11493   }
11494 }
11495 
SplitStringsAtSemicolon(ValNodePtr PNTR head)11496 static ValNodePtr SplitStringsAtSemicolon (ValNodePtr PNTR head)
11497 
11498 {
11499   ValNodePtr  curr, vnp;
11500   CharPtr     ptr, str;
11501 
11502   if (head == NULL || *head == NULL) return NULL;
11503 
11504   curr = *head;
11505   while (curr != NULL) {
11506     str = (CharPtr) curr->data.ptrvalue;
11507     ptr = StringChr (str, ';');
11508     if (ptr != NULL) {
11509       *ptr = '\0';
11510       ptr++;
11511       vnp = ValNodeCopyStr (NULL, 0, ptr);
11512       if (vnp != NULL) {
11513         vnp->next = curr->next;
11514         curr->next = vnp;
11515       }
11516     }
11517     curr = curr->next;
11518   }
11519 
11520   return *head;
11521 }
11522 
11523 
CleanupDescriptorStrings(ValNodePtr sdp,Boolean stripSerial,Boolean modernizeFeats,ValNodePtr PNTR publist,Boolean isEmblOrDdbj)11524 static void CleanupDescriptorStrings (
11525   ValNodePtr sdp,
11526   Boolean stripSerial,
11527   Boolean modernizeFeats,
11528   ValNodePtr PNTR publist,
11529   Boolean isEmblOrDdbj
11530 )
11531 
11532 {
11533   BioSourcePtr  biop;
11534   EMBLBlockPtr  ebp;
11535   GBBlockPtr    gbp;
11536   OrgNamePtr    onp = NULL;
11537   OrgRefPtr     orp;
11538   PubdescPtr    pdp;
11539   PirBlockPtr   pir;
11540   PrfBlockPtr   prf;
11541   SPBlockPtr    sp;
11542   SubSourcePtr  ssp;
11543   CharPtr       str;
11544   ValNodePtr    vnp;
11545 
11546   if (sdp == NULL) return;
11547   switch (sdp->choice) {
11548     case Seq_descr_mol_type :
11549     case Seq_descr_method :
11550       return;
11551     default :
11552       break;
11553   }
11554   if (sdp->data.ptrvalue == NULL) return;
11555 
11556   biop = NULL;
11557   orp = NULL;
11558   switch (sdp->choice) {
11559     case Seq_descr_mol_type :
11560       break;
11561     case Seq_descr_modif :
11562       break;
11563     case Seq_descr_method :
11564       break;
11565     case Seq_descr_name :
11566       CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
11567       if (sdp->data.ptrvalue == NULL) {
11568         sdp->data.ptrvalue = StringSave ("");
11569       }
11570       break;
11571     case Seq_descr_title :
11572       BSECDecodeXml ((CharPtr) sdp->data.ptrvalue);
11573       str = (CharPtr) sdp->data.ptrvalue;
11574       CleanVisStringAndCompress ((CharPtr PNTR) &sdp->data.ptrvalue);
11575       if (sdp->data.ptrvalue == NULL) {
11576         sdp->data.ptrvalue = StringSave ("");
11577       }
11578       break;
11579     case Seq_descr_org :
11580       orp = (OrgRefPtr) sdp->data.ptrvalue;
11581       break;
11582     case Seq_descr_comment :
11583       BSECDecodeXml ((CharPtr) sdp->data.ptrvalue);
11584       CleanVisStringJunk ((CharPtr PNTR) &sdp->data.ptrvalue);
11585       RemoveSpacesBetweenTildes ((CharPtr) sdp->data.ptrvalue);
11586       if (sdp->data.ptrvalue == NULL) {
11587         sdp->data.ptrvalue = StringSave ("");
11588       }
11589       break;
11590     case Seq_descr_num :
11591       break;
11592     case Seq_descr_maploc :
11593       break;
11594     case Seq_descr_pir :
11595       pir = (PirBlockPtr) sdp->data.ptrvalue;
11596       SplitStringsAtSemicolon (&(pir->keywords));
11597       break;
11598     case Seq_descr_genbank :
11599       gbp = (GBBlockPtr) sdp->data.ptrvalue;
11600       SplitStringsAtSemicolon (&(gbp->keywords));
11601       for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
11602         str = (CharPtr) vnp->data.ptrvalue;
11603         if (StringICmp (str, "TPA:reassembly") == 0) {
11604           vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
11605           vnp->data.ptrvalue = StringSave ("TPA:assembly");
11606         } else if (StringICmp (str, "TPA_reassembly") == 0) {
11607           vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
11608           vnp->data.ptrvalue = StringSave ("TPA:assembly");
11609         } else if (StringICmp (str, "TPA_assembly") == 0) {
11610           vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
11611           vnp->data.ptrvalue = StringSave ("TPA:assembly");
11612         }
11613       }
11614       CleanVisStringList (&(gbp->extra_accessions));
11615       gbp->extra_accessions = ValNodeSort (gbp->extra_accessions, SortVnpByString);
11616       gbp->extra_accessions = UniqueValNode (gbp->extra_accessions);
11617       if (isEmblOrDdbj) {
11618         CleanVisStringListCaseSensitive (&(gbp->keywords));
11619       } else {
11620         CleanVisStringList (&(gbp->keywords));
11621       }
11622       CleanVisStringJunk (&(gbp->source));
11623       if (StringCmp (gbp->source, ".") == 0) {
11624         gbp->source = MemFree (gbp->source);
11625       }
11626       CleanVisStringJunk (&(gbp->origin));
11627       if (StringCmp (gbp->origin, ".") == 0) {
11628         gbp->origin = MemFree (gbp->origin);
11629       }
11630       CleanVisString (&(gbp->date));
11631       CleanVisString (&(gbp->div));
11632       CleanVisString (&(gbp->taxonomy));
11633       break;
11634     case Seq_descr_pub :
11635       pdp = (PubdescPtr) sdp->data.ptrvalue;
11636       CleanDoubleQuote (pdp->comment);
11637       NormalizePubdesc (pdp, stripSerial, TRUE, publist);
11638       break;
11639     case Seq_descr_region :
11640       CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
11641       if (sdp->data.ptrvalue == NULL) {
11642         sdp->data.ptrvalue = StringSave ("");
11643       }
11644       break;
11645     case Seq_descr_user :
11646       VisitAllUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject);
11647       break;
11648     case Seq_descr_sp :
11649       sp = (SPBlockPtr) sdp->data.ptrvalue;
11650       SplitStringsAtSemicolon (&(sp->keywords));
11651       break;
11652     case Seq_descr_dbxref :
11653       break;
11654     case Seq_descr_embl :
11655       ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
11656       CleanVisStringList (&(ebp->extra_acc));
11657       ebp->extra_acc = ValNodeSort (ebp->extra_acc, SortVnpByString);
11658       SplitStringsAtSemicolon (&(ebp->keywords));
11659       CleanVisStringListCaseSensitive (&(ebp->keywords));
11660       break;
11661     case Seq_descr_create_date :
11662       break;
11663     case Seq_descr_update_date :
11664       break;
11665     case Seq_descr_prf :
11666       prf = (PrfBlockPtr) sdp->data.ptrvalue;
11667       SplitStringsAtSemicolon (&(prf->keywords));
11668       break;
11669     case Seq_descr_pdb :
11670       break;
11671     case Seq_descr_het :
11672       break;
11673     case Seq_descr_source :
11674       biop = (BioSourcePtr) sdp->data.ptrvalue;
11675       if (biop != NULL) {
11676         if (biop->genome == GENOME_virion) {
11677           biop->genome = GENOME_unknown;
11678         }
11679         orp = biop->org;
11680         if (orp != NULL) {
11681           CleanVisStringList (&(orp->mod));
11682           OrpModToSubSource (&(orp->mod), &(biop->subtype));
11683           onp = orp->orgname;
11684           if (onp != NULL) {
11685             CleanupOrgModOther (biop, onp);
11686           }
11687         }
11688         biop->subtype = SortSubSourceList (biop->subtype);
11689         CleanSubSourceList (&(biop->subtype), biop->genome);
11690         CleanupSubSourceOther (biop, onp);
11691         biop->subtype = SortSubSourceList (biop->subtype);
11692         if (modernizeFeats) {
11693           ModernizePCRPrimers (biop);
11694         }
11695         CleanupPCRReactionSet (&(biop->pcr_primers));
11696         if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) {
11697           for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
11698             if (ssp->subtype == SUBSRC_plasmid_name) {
11699               biop->genome = GENOME_plasmid;
11700             }
11701           }
11702         }
11703       }
11704       break;
11705     case Seq_descr_molinfo :
11706       break;
11707     default :
11708       break;
11709   }
11710   if (orp != NULL) {
11711     CleanVisStringAndCompress (&(orp->taxname));
11712     CleanVisStringAndCompress (&(orp->common));
11713     CleanVisStringList (&(orp->mod));
11714     CleanVisStringList (&(orp->syn));
11715     FixOldDbxrefs (orp->db, isEmblOrDdbj);
11716     FixNumericDbxrefs (orp->db);
11717     orp->db = ValNodeSort (orp->db, SortDbxref);
11718     orp->syn = ValNodeSort (orp->syn, SortVnpByString);
11719     orp->syn = UniqueValNode (orp->syn);
11720     CleanupDuplicateDbxrefs (&(orp->db));
11721     CleanupObsoleteDbxrefs (&(orp->db));
11722     CleanupGoDbxrefs (orp->db);
11723     onp = orp->orgname;
11724     while (onp != NULL) {
11725       CleanVisString (&(onp->attrib));
11726       CleanVisString (&(onp->lineage));
11727       CleanVisString (&(onp->div));
11728       OrpModToOrgMod (&(orp->mod), &(onp->mod));
11729       onp->mod = SortOrgModList (onp->mod);
11730       CleanOrgModListEx (&(onp->mod), orp->common);
11731       onp->mod = SortOrgModList (onp->mod);
11732       onp = onp->next;
11733     }
11734   }
11735 }
11736 
CheckForQual(GBQualPtr gbqual,CharPtr string_q,CharPtr string_v)11737 static Int2 CheckForQual (GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
11738 
11739 {
11740   GBQualPtr curq;
11741 
11742   for (curq = gbqual; curq; curq = curq->next) {
11743     if (StringCmp (string_q, curq->qual) == 0) {
11744       if (curq->val == NULL) {
11745         curq->val = StringSave (string_v);
11746         return 1;
11747       }
11748       if (StringCmp (string_v, curq->val) == 0) return 1;
11749     }
11750   }
11751   return 0;
11752 }
AddGBQual(GBQualPtr gbqual,CharPtr qual,CharPtr val)11753 static GBQualPtr AddGBQual (GBQualPtr gbqual, CharPtr qual, CharPtr val)
11754 
11755 {
11756   GBQualPtr curq;
11757 
11758   if (StringCmp (qual, "translation") == 0) {
11759     if (val == NULL)  return gbqual;
11760     if (*val == '\0') return gbqual;
11761   }
11762   if (gbqual) {
11763     if (CheckForQual (gbqual, qual, val) == 1) return gbqual;
11764     for (curq = gbqual; curq->next != NULL; curq = curq->next) continue;
11765     curq->next = GBQualNew ();
11766     curq = curq->next;
11767     if (val)
11768       curq->val = StringSave (val);
11769     curq->qual = StringSave (qual);
11770   } else {
11771     gbqual = GBQualNew ();
11772     gbqual->next = NULL;
11773     if (val)
11774       gbqual->val = StringSave (val);
11775     gbqual->qual = StringSave (qual);
11776   }
11777   return gbqual;
11778 }
11779 
AddReplaceQual(SeqFeatPtr sfp,CharPtr p)11780 static void AddReplaceQual (SeqFeatPtr sfp, CharPtr p)
11781 
11782 {
11783   CharPtr s, val;
11784 
11785   val = StringChr (p, '\"');
11786   if (val == NULL) return;
11787   val++;
11788   s = p + StringLen (p) - 1;
11789   if (*s != ')') return;
11790   for (s--; s > val && *s != '\"'; s--) continue;
11791   if (*s != '\"') return;
11792   *s = '\0';
11793   sfp->qual = (GBQualPtr) AddGBQual (sfp->qual, "replace", val);
11794   *s = '\"';
11795 }
11796 
11797 //LCOV_EXCL_START
SerialNumberInString(CharPtr str)11798 NLM_EXTERN Boolean SerialNumberInString (CharPtr str)
11799 
11800 {
11801   Char     ch;
11802   Boolean  hasdigits;
11803   CharPtr  ptr;
11804   Boolean  suspicious = FALSE;
11805 
11806   if (str == NULL || StringHasNoText (str)) return FALSE;
11807   ptr = StringChr (str, '[');
11808 
11809   /* bail if first digit after bracket is 0 */
11810   if (ptr != NULL && ptr [1] == '0') return FALSE;
11811 
11812   while ((! suspicious) && ptr != NULL) {
11813     hasdigits = FALSE;
11814     ptr++;
11815     ch = *ptr;
11816     while (IS_DIGIT (ch)) {
11817       hasdigits = TRUE;
11818       ptr++;
11819       ch = *ptr;
11820     }
11821     if (ch == ']' && hasdigits) {
11822       suspicious = TRUE;
11823     }
11824     if (! suspicious) {
11825       ptr = StringChr (ptr, '[');
11826     }
11827   }
11828   return suspicious;
11829 }
11830 //LCOV_EXCL_STOP
11831 
11832 /* now only strips serials for local, general, refseq, and 2+6 genbank ids */
CheckForSwissProtID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)11833 static void CheckForSwissProtID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
11834 
11835 {
11836   BioseqPtr     bsp;
11837   SeqIdPtr      sip;
11838   BoolPtr       stripSerial;
11839   TextSeqIdPtr  tsip;
11840 
11841   if (sep == NULL) return;
11842   if (IS_Bioseq (sep)) {
11843     bsp = (BioseqPtr) sep->data.ptrvalue;
11844     if (bsp == NULL) return;
11845     stripSerial = (BoolPtr) mydata;
11846     if (stripSerial == NULL) return;
11847     for (sip = bsp->id; sip != NULL; sip = sip->next) {
11848       switch (sip->choice) {
11849         case SEQID_GIBBSQ :
11850         case SEQID_GIBBMT :
11851           *stripSerial = FALSE;
11852           break;
11853         case SEQID_EMBL :
11854         case SEQID_PIR :
11855         case SEQID_SWISSPROT :
11856         case SEQID_PATENT :
11857         case SEQID_DDBJ :
11858         case SEQID_PRF :
11859         case SEQID_PDB :
11860         case SEQID_TPE:
11861         case SEQID_TPD:
11862         case SEQID_GPIPE:
11863           *stripSerial = FALSE;
11864           break;
11865         case SEQID_GENBANK :
11866         case SEQID_TPG:
11867           tsip = (TextSeqIdPtr) sip->data.ptrvalue;
11868           if (tsip != NULL) {
11869             if (StringLen (tsip->accession) == 6) {
11870               *stripSerial = FALSE;
11871             }
11872           }
11873           break;
11874         case SEQID_NOT_SET :
11875         case SEQID_LOCAL :
11876         case SEQID_OTHER :
11877         case SEQID_GENERAL :
11878           break;
11879         default :
11880           break;
11881       }
11882     }
11883   }
11884 }
11885 
CheckForEmblDdbjID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)11886 static void CheckForEmblDdbjID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
11887 
11888 {
11889   BioseqPtr  bsp;
11890   BoolPtr    isEmblOrDdbj;
11891   SeqIdPtr   sip;
11892 
11893   if (sep == NULL) return;
11894   if (IS_Bioseq (sep)) {
11895     bsp = (BioseqPtr) sep->data.ptrvalue;
11896     if (bsp == NULL) return;
11897     isEmblOrDdbj = (BoolPtr) mydata;
11898     if (isEmblOrDdbj == NULL) return;
11899     for (sip = bsp->id; sip != NULL; sip = sip->next) {
11900       switch (sip->choice) {
11901         case SEQID_EMBL :
11902         case SEQID_DDBJ :
11903           *isEmblOrDdbj = TRUE;
11904           break;
11905         default :
11906           break;
11907       }
11908     }
11909   }
11910 }
11911 
CheckForJournalScanID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)11912 static void CheckForJournalScanID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
11913 
11914 {
11915   BioseqPtr  bsp;
11916   BoolPtr    isJScan;
11917   SeqIdPtr   sip;
11918 
11919   if (sep == NULL) return;
11920   if (IS_Bioseq (sep)) {
11921     bsp = (BioseqPtr) sep->data.ptrvalue;
11922     if (bsp == NULL) return;
11923     isJScan = (BoolPtr) mydata;
11924     if (isJScan == NULL) return;
11925     for (sip = bsp->id; sip != NULL; sip = sip->next) {
11926       switch (sip->choice) {
11927         case SEQID_GIBBSQ :
11928         case SEQID_GIBBMT :
11929         case SEQID_GIIM :
11930           *isJScan = TRUE;
11931           break;
11932         default :
11933           break;
11934       }
11935     }
11936   }
11937 }
11938 
FixWrongFuzzOnPlusStrand(SeqLocPtr location)11939 NLM_EXTERN Boolean FixWrongFuzzOnPlusStrand (SeqLocPtr location)
11940 
11941 {
11942   SeqLocPtr   firstSlp;
11943   IntFuzzPtr  ifp;
11944   SeqLocPtr   lastSlp;
11945   Boolean     res = FALSE;
11946   SeqIntPtr   sip;
11947   SeqLocPtr   slp;
11948   SeqPntPtr   spp;
11949 
11950   if (location == NULL) return FALSE;
11951 
11952   firstSlp = NULL;
11953   lastSlp = NULL;
11954   slp = SeqLocFindNext (location, NULL);
11955   while (slp != NULL) {
11956     if (firstSlp == NULL) {
11957       firstSlp = slp;
11958     }
11959     lastSlp = slp;
11960     slp = SeqLocFindNext (location, slp);
11961   }
11962 
11963   if (firstSlp != NULL && firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
11964     sip = (SeqIntPtr) firstSlp->data.ptrvalue;
11965     if (sip != NULL && (sip->strand == Seq_strand_plus || sip->strand == Seq_strand_unknown)) {
11966       if (sip->if_to != NULL && sip->if_from == NULL) {
11967         sip->if_from = IntFuzzFree (sip->if_from);
11968         ifp = IntFuzzNew ();
11969         if (ifp != NULL) {
11970           ifp->choice = 4;
11971           sip->if_from = ifp;
11972           ifp->a = 2;
11973           res = TRUE;
11974         }
11975       }
11976     }
11977   }
11978 
11979   if (lastSlp != NULL && lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
11980     sip = (SeqIntPtr) lastSlp->data.ptrvalue;
11981     if (sip != NULL && (sip->strand == Seq_strand_plus || sip->strand == Seq_strand_unknown)) {
11982       if (sip->if_to == NULL && sip->if_from != NULL) {
11983         sip->if_to = IntFuzzFree (sip->if_to);
11984         ifp = IntFuzzNew ();
11985         if (ifp != NULL) {
11986           ifp->choice = 4;
11987           sip->if_to = ifp;
11988           ifp->a = 1;
11989           res = TRUE;
11990         }
11991       }
11992     }
11993   }
11994 
11995   return res;
11996 }
11997 
FixWrongFuzzOnMinusStrand(SeqLocPtr location)11998 NLM_EXTERN Boolean FixWrongFuzzOnMinusStrand (SeqLocPtr location)
11999 
12000 {
12001   SeqLocPtr   firstSlp;
12002   IntFuzzPtr  ifp;
12003   SeqLocPtr   lastSlp;
12004   Boolean     res = FALSE;
12005   SeqIntPtr   sip;
12006   SeqLocPtr   slp;
12007   SeqPntPtr   spp;
12008 
12009   if (location == NULL) return FALSE;
12010 
12011   firstSlp = NULL;
12012   lastSlp = NULL;
12013   slp = SeqLocFindNext (location, NULL);
12014   while (slp != NULL) {
12015     if (firstSlp == NULL) {
12016       firstSlp = slp;
12017     }
12018     lastSlp = slp;
12019     slp = SeqLocFindNext (location, slp);
12020   }
12021 
12022   if (firstSlp != NULL && firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
12023     sip = (SeqIntPtr) firstSlp->data.ptrvalue;
12024     if (sip != NULL && (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev)) {
12025       if (sip->if_to == NULL && sip->if_from != NULL) {
12026         sip->if_from = IntFuzzFree (sip->if_from);
12027         ifp = IntFuzzNew ();
12028         if (ifp != NULL) {
12029           ifp->choice = 4;
12030           sip->if_to = ifp;
12031           ifp->a = 1;
12032           res = TRUE;
12033         }
12034       }
12035     }
12036   }
12037 
12038   if (lastSlp != NULL && lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
12039     sip = (SeqIntPtr) lastSlp->data.ptrvalue;
12040     if (sip != NULL && (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev)) {
12041       if (sip->if_to != NULL && sip->if_from == NULL) {
12042         sip->if_to = IntFuzzFree (sip->if_to);
12043         ifp = IntFuzzNew ();
12044         if (ifp != NULL) {
12045           ifp->choice = 4;
12046           sip->if_from = ifp;
12047           ifp->a = 2;
12048           res = TRUE;
12049         }
12050       }
12051     }
12052   }
12053 
12054   return res;
12055 }
12056 
CleanUpSeqLoc(SeqLocPtr slp)12057 NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp)
12058 
12059 {
12060   BioseqPtr  bsp;
12061   SeqLocPtr  curr;
12062   SeqLocPtr  head;
12063   SeqLocPtr  last;
12064   SeqLocPtr  loc;
12065   SeqLocPtr  next;
12066   SeqIdPtr   sip;
12067   SeqIntPtr  sintp;
12068   SeqPntPtr  spp;
12069   Int4       swp;
12070   SeqLocPtr  tail;
12071 
12072   if (slp == NULL) return;
12073 
12074   if (slp->choice == SEQLOC_WHOLE) {
12075     sip = (SeqIdPtr) slp->data.ptrvalue;
12076     if (sip != NULL) {
12077       bsp = BioseqFind (sip);
12078       if (bsp != NULL) {
12079         sintp = SeqIntNew ();
12080         if (sintp != NULL) {
12081           sintp->from = 0;
12082           sintp->to = bsp->length - 1;
12083           sintp->id = sip; /* reuse existing slp->data.ptrvalue, no need to free */
12084           slp->choice = SEQLOC_INT;
12085           slp->data.ptrvalue = (Pointer) sintp;
12086         }
12087       }
12088     }
12089   }
12090 
12091   /* from < to for all intervals */
12092   loc = SeqLocFindNext (slp, NULL);
12093   while (loc != NULL) {
12094     if (loc->choice == SEQLOC_INT) {
12095       sintp = (SeqIntPtr) loc->data.ptrvalue;
12096       if (sintp != NULL) {
12097         if (sintp->from > sintp->to) {
12098           swp = sintp->from;
12099           sintp->from = sintp->to;
12100           sintp->to = swp;
12101         }
12102         if (sintp->strand == Seq_strand_both) {
12103           sintp->strand = Seq_strand_plus;
12104         } else if (sintp->strand == Seq_strand_both_rev) {
12105           sintp->strand = Seq_strand_minus;
12106         }
12107       }
12108     } else if (loc->choice == SEQLOC_PNT) {
12109       spp = (SeqPntPtr) loc->data.ptrvalue;
12110       if (spp != NULL) {
12111         if (spp->strand == Seq_strand_both) {
12112           spp->strand = Seq_strand_plus;
12113         } else if (spp->strand == Seq_strand_both_rev) {
12114           spp->strand = Seq_strand_minus;
12115         }
12116       }
12117     }
12118     loc = SeqLocFindNext (slp, loc);
12119   }
12120 
12121   if (slp->choice == SEQLOC_PACKED_INT) {
12122     loc = (SeqLocPtr) slp->data.ptrvalue;
12123     if (loc == NULL || loc->next != NULL) return;
12124     /* here seqloc_packed_int points to a single location element, so no need for seqloc_packed_int parent */
12125     slp->choice = loc->choice;
12126     slp->data.ptrvalue = (Pointer) loc->data.ptrvalue;
12127     MemFree (loc);
12128     return;
12129   }
12130 
12131   if (slp->choice != SEQLOC_MIX) return;
12132   loc = (SeqLocPtr) slp->data.ptrvalue;
12133   if (loc == NULL) return;
12134 
12135   if (loc->next != NULL) {
12136     /* check for null NULL at beginning */
12137     if (loc->choice == SEQLOC_NULL) {
12138       slp->data.ptrvalue = (Pointer) loc->next;
12139       loc->next = NULL;
12140       ValNodeFree (loc);
12141     }
12142     /* check for null NULL at end */
12143     loc = (SeqLocPtr) slp->data.ptrvalue;
12144     last = NULL;
12145     while (loc->next != NULL) {
12146       last = loc;
12147       loc = loc->next;
12148     }
12149     if (loc->choice == SEQLOC_NULL && last != NULL) {
12150       last->next = NULL;
12151       ValNodeFree (loc);
12152     }
12153   }
12154 
12155   loc = (SeqLocPtr) slp->data.ptrvalue;
12156   if (loc == NULL) return;
12157 
12158   if (loc->next == NULL) {
12159     /* here seqloc_mix points to a single location element, so no need for seqloc_mix parent */
12160     slp->choice = loc->choice;
12161     slp->data.ptrvalue = (Pointer) loc->data.ptrvalue;
12162     MemFree (loc);
12163     return;
12164   }
12165 
12166   /* check for nested seqloc_mix, remove nesting */
12167   curr = loc;
12168   last = NULL;
12169   while (curr != NULL) {
12170     next = curr->next;
12171     if (curr->choice == SEQLOC_MIX) {
12172       head = (SeqLocPtr) curr->data.ptrvalue;
12173       if (head != NULL) {
12174         tail = head;
12175         while (tail->next != NULL) {
12176           tail = tail->next;
12177         }
12178         if (last != NULL) {
12179           last->next = head;
12180         }
12181         tail->next = curr->next;
12182         curr->next = NULL;
12183         curr = MemFree (curr);
12184       }
12185     } else {
12186       last = curr;
12187     }
12188     curr = next;
12189   }
12190 
12191   NormalizeNullsBetween (slp);
12192 
12193   /*
12194   FixWrongFuzzOnPlusStrand (slp);
12195   FixWrongFuzzOnMinusStrand (slp);
12196   */
12197 }
12198 
12199 typedef struct cbloc {
12200   CodeBreakPtr  cbp;
12201   Int4          pos;
12202 } CbLoc, PNTR CbLocPtr;
12203 
SortByCodeBreakLoc(VoidPtr ptr1,VoidPtr ptr2)12204 static int LIBCALLBACK SortByCodeBreakLoc (VoidPtr ptr1, VoidPtr ptr2)
12205 
12206 {
12207   CbLocPtr  clp1;
12208   CbLocPtr  clp2;
12209 
12210   clp1 = (CbLocPtr) ptr1;
12211   clp2 = (CbLocPtr) ptr2;
12212   if (clp1 == NULL || clp2 == NULL) return 0;
12213   if (clp1->pos < clp2->pos) {
12214     return -1;
12215   } else if (clp1->pos > clp2->pos) {
12216     return 1;
12217   }
12218   return 0;
12219 }
12220 
SortCodeBreaks(SeqFeatPtr sfp,CodeBreakPtr list)12221 static CodeBreakPtr SortCodeBreaks (SeqFeatPtr sfp, CodeBreakPtr list)
12222 
12223 {
12224   BioseqPtr     bsp;
12225   CodeBreakPtr  cbp;
12226   CbLocPtr      head;
12227   size_t        count, i;
12228   Boolean       out_of_order = FALSE;
12229   Int4          pos;
12230   SeqLocPtr     slp;
12231 
12232   if (sfp == NULL || list == NULL) return list;
12233   bsp = BioseqFindFromSeqLoc (sfp->product);
12234   if (bsp == NULL) return list;
12235 
12236   for (cbp = list, count = 0; cbp != NULL; cbp = cbp->next, count++) continue;
12237   if (count < 2) return list;
12238 
12239   head = (CbLocPtr) MemNew (sizeof (CbLoc) * (count + 1));
12240   if (head == NULL) return list;
12241 
12242   for (cbp = list, i = 0; cbp != NULL && i < count; i++) {
12243     head [i].cbp = cbp;
12244     slp = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE);
12245     head [i].pos = GetOffsetInBioseq (slp, bsp, SEQLOC_START) + 1;
12246     SeqLocFree (slp);
12247     cbp = cbp->next;
12248   }
12249 
12250   pos = head [0].pos;
12251   for (i = 1; i < count; i++) {
12252     if (head [i].pos < pos) {
12253       out_of_order = TRUE;
12254     }
12255     pos = head [i].pos;
12256   }
12257 
12258   if (out_of_order) {
12259     StableMergeSort (head, count, sizeof (CbLoc), SortByCodeBreakLoc);
12260 
12261     for (i = 0; i < count; i++) {
12262       cbp = head [i].cbp;
12263       cbp->next = head [i + 1].cbp;
12264     }
12265 
12266     list = head [0].cbp;
12267   }
12268 
12269   MemFree (head);
12270 
12271   return list;
12272 }
12273 
CleanupDuplicatedCodeBreaks(CodeBreakPtr PNTR prevcbp)12274 static void CleanupDuplicatedCodeBreaks (CodeBreakPtr PNTR prevcbp)
12275 
12276 {
12277   CodeBreakPtr  cbp;
12278   CodeBreakPtr  last = NULL;
12279   CodeBreakPtr  next;
12280   Boolean       unlink;
12281 
12282   if (prevcbp == NULL) return;
12283   cbp = *prevcbp;
12284   while (cbp != NULL) {
12285     next = cbp->next;
12286     unlink = FALSE;
12287     if (last != NULL) {
12288       if (SeqLocCompare (cbp->loc, last->loc) == SLC_A_EQ_B &&
12289           cbp->aa.choice == last->aa.choice &&
12290           cbp->aa.value.intvalue == last->aa.value.intvalue) {
12291         unlink = TRUE;
12292       }
12293     } else {
12294       last = cbp;
12295     }
12296     if (unlink) {
12297       *prevcbp = cbp->next;
12298       cbp->next = NULL;
12299       CodeBreakFree (cbp);
12300     } else {
12301       last = cbp;
12302       prevcbp = (CodeBreakPtr PNTR) &(cbp->next);
12303     }
12304     cbp = next;
12305   }
12306 }
12307 
12308 //LCOV_EXCL_START
12309 CharPtr ncrnaClassList[] = {
12310 "antisense_RNA",
12311 "autocatalytically_spliced_intron",
12312 "hammerhead_ribozyme",
12313 "ribozyme",
12314 "RNase_P_RNA",
12315 "RNase_MRP_RNA",
12316 "telomerase_RNA",
12317 "guide_RNA",
12318 "rasiRNA",
12319 "scRNA",
12320 "siRNA",
12321 "miRNA",
12322 "piRNA",
12323 "snoRNA",
12324 "snRNA",
12325 "SRP_RNA",
12326 "vault_RNA",
12327 "Y_RNA",
12328 "lncRNA",
12329 "other",
12330 NULL};
12331 
12332 Int4 NcrnaOTHER = sizeof (ncrnaClassList) / sizeof (CharPtr) - 1;
12333 
IsStringInNcRNAClassList(CharPtr str)12334 extern Boolean IsStringInNcRNAClassList (CharPtr str)
12335 {
12336   CharPtr PNTR p;
12337 
12338   if (StringHasNoText (str)) return FALSE;
12339   for (p = ncrnaClassList; *p != NULL; p++)
12340   {
12341     if (StringICmp (str, *p) == 0)
12342     {
12343       return TRUE;
12344     }
12345   }
12346   return FALSE;
12347 }
12348 
12349 
12350 CharPtr regulatoryClassList[] = {
12351 "attenuator",
12352 "CAAT_signal",
12353 "DNase_I_hypersensitive_site",
12354 "enhancer_blocking_element",
12355 "enhancer",
12356 "GC_signal",
12357 "imprinting_control_region",
12358 "insulator",
12359 "locus_control_region",
12360 "matrix_attachment_region",
12361 "minus_10_signal",
12362 "minus_35_signal",
12363 "polyA_signal_sequence",
12364 "promoter",
12365 "recoding_stimulatory_region",
12366 "replication_regulatory_region",
12367 "response_element",
12368 "ribosome_binding_site",
12369 "riboswitch",
12370 "silencer",
12371 "TATA_box",
12372 "terminator",
12373 "transcriptional_cis_regulatory_region",
12374 "other",
12375 NULL};
12376 
12377 Int4 RegulatoryOTHER = sizeof (regulatoryClassList) / sizeof (CharPtr) - 1;
12378 
IsStringInRegulatoryClassList(CharPtr str)12379 extern Boolean IsStringInRegulatoryClassList (CharPtr str)
12380 
12381 {
12382   CharPtr PNTR p;
12383 
12384   if (StringHasNoText (str)) return FALSE;
12385   for (p = regulatoryClassList; *p != NULL; p++)
12386   {
12387     if (StringICmp (str, *p) == 0)
12388     {
12389       return TRUE;
12390     }
12391   }
12392   return FALSE;
12393 }
12394 
12395 CharPtr recombinationClassList[] = {
12396 "chromosome_breakpoint",
12397 "meiotic_recombination",
12398 "mitotic_recombination",
12399 "non_allelic_homologous_recombination",
12400 "other",
12401 NULL};
12402 
IsStringInRecombinationClassList(CharPtr str)12403 extern Boolean IsStringInRecombinationClassList (CharPtr str)
12404 
12405 {
12406   CharPtr PNTR p;
12407 
12408   if (StringHasNoText (str)) return FALSE;
12409   for (p = recombinationClassList; *p != NULL; p++)
12410   {
12411     if (StringICmp (str, *p) == 0)
12412     {
12413       return TRUE;
12414     }
12415   }
12416   return FALSE;
12417 }
12418 //LCOV_EXCL_STOP
12419 
AddNonCopiedQual(SeqFeatPtr sfp,CharPtr qual,CharPtr class_val)12420 static void AddNonCopiedQual (SeqFeatPtr sfp, CharPtr qual, CharPtr class_val)
12421 {
12422   GBQualPtr gbq;
12423 
12424   if (sfp == NULL || StringHasNoText (qual) || StringHasNoText (class_val))
12425   {
12426     return;
12427   }
12428   gbq = sfp->qual;
12429   while (gbq != NULL
12430           && (StringCmp (gbq->qual, qual) != 0
12431               || StringCmp (gbq->val, class_val) != 0))
12432   {
12433     gbq = gbq->next;
12434   }
12435   if (gbq == NULL)
12436   {
12437     gbq = GBQualNew ();
12438     gbq->qual = StringSave (qual);
12439     gbq->val = StringSave (class_val);
12440     gbq->next = sfp->qual;
12441     sfp->qual = gbq;
12442   }
12443 
12444 }
12445 
12446 
GetMiRNAProduct(CharPtr str)12447 static CharPtr GetMiRNAProduct (CharPtr str)
12448 {
12449   Int4    len;
12450   CharPtr product = NULL;
12451 
12452   if (StringHasNoText (str)) return NULL;
12453   if (StringNCmp (str, "miRNA ", 6) == 0)
12454   {
12455     product = StringSave (str + 6);
12456   }
12457   else if (StringNCmp (str, "microRNA ", 9) == 0)
12458   {
12459     product = StringSave (str + 9);
12460   }
12461   else
12462   {
12463     len = StringLen (str);
12464     if (len > 6 && StringCmp (str + len - 6, " miRNA") == 0
12465         && (len < 15 || StringCmp (str + len - 15, "precursor miRNA") != 0))
12466     {
12467       product = (CharPtr) MemNew (sizeof (Char) * (len - 5));
12468       StringNCpy (product, str, len - 6);
12469       product[len - 6] = 0;
12470     }
12471     else if (len > 9 && StringCmp (str + len - 9, " microRNA") == 0
12472              && (len < 18 || StringCmp (str + len - 18, "precursor microRNA") != 0))
12473     {
12474       product = (CharPtr) MemNew (sizeof (Char) * (len - 8));
12475       StringNCpy (product, str, len - 9);
12476       product[len - 9] = 0;
12477     }
12478   }
12479   return product;
12480 }
12481 
12482 
ConvertToNcRNA(SeqFeatPtr sfp)12483 static Boolean ConvertToNcRNA (SeqFeatPtr sfp)
12484 {
12485   GBQualPtr gbq;
12486   RnaRefPtr rrp;
12487   Boolean was_converted = FALSE;
12488   CharPtr miRNAproduct = NULL;
12489 
12490   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL)
12491   {
12492     return FALSE;
12493   }
12494   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12495   if (rrp->type == 5 || rrp->type == 6 || rrp->type == 7)
12496   {
12497     if (rrp->type == 5)
12498     {
12499       AddNonCopiedQual (sfp, "ncRNA_class", "snRNA");
12500     }
12501     else if (rrp->type == 6)
12502     {
12503       AddNonCopiedQual (sfp, "ncRNA_class", "scRNA");
12504     }
12505     else if (rrp->type == 7)
12506     {
12507       AddNonCopiedQual (sfp, "ncRNA_class", "snoRNA");
12508     }
12509     if (rrp->ext.choice == 1)
12510     {
12511       AddNonCopiedQual (sfp, "product", rrp->ext.value.ptrvalue);
12512       rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12513     }
12514     rrp->ext.choice = 1;
12515     rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12516     rrp->type = 255;
12517     was_converted = TRUE;
12518   }
12519   else if (rrp->type == 255 && rrp->ext.choice == 1)
12520   {
12521     if (IsStringInNcRNAClassList (rrp->ext.value.ptrvalue))
12522     {
12523       AddNonCopiedQual (sfp, "ncRNA_class", rrp->ext.value.ptrvalue);
12524       rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12525       rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12526       was_converted = TRUE;
12527     }
12528     else if ((miRNAproduct = GetMiRNAProduct (rrp->ext.value.ptrvalue)) != NULL)
12529     {
12530       AddNonCopiedQual (sfp, "ncRNA_class", "miRNA");
12531       rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12532       rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12533       AddNonCopiedQual (sfp, "product", miRNAproduct);
12534       miRNAproduct = MemFree (miRNAproduct);
12535       was_converted = TRUE;
12536     }
12537     else if (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
12538              && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
12539              && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0)
12540     {
12541       AddNonCopiedQual (sfp, "product", rrp->ext.value.ptrvalue);
12542       rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12543       rrp->ext.value.ptrvalue = StringSave ("misc_RNA");
12544       was_converted = TRUE;
12545     }
12546   }
12547   if (rrp->type == 255 && rrp->ext.choice == 0) {
12548     rrp->ext.choice = 1;
12549     rrp->ext.value.ptrvalue = StringSave ("misc_RNA");
12550   }
12551   if (rrp->type == 255 && rrp->ext.choice == 1 &&
12552       StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0) {
12553     for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
12554       if (StringCmp (gbq->qual, "ncRNA_class") == 0) {
12555         rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12556         rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12557         was_converted = TRUE;
12558       } else if (StringCmp (gbq->qual, "tag_peptide") == 0) {
12559         rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12560         rrp->ext.value.ptrvalue = StringSave ("tmRNA");
12561         was_converted = TRUE;
12562       }
12563     }
12564   }
12565   return was_converted;
12566 }
12567 
ModernizeFeatureStrings(SeqFeatPtr sfp,Boolean isEmblOrDdbj)12568 static void ModernizeFeatureStrings (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
12569 
12570 {
12571   CharPtr      desc;
12572   GBQualPtr    gbq;
12573   CharPtr      name;
12574   ProtRefPtr   prp;
12575   RnaRefPtr    rrp;
12576   CharPtr      str;
12577   ValNodePtr   vnp;
12578 
12579   if (sfp == NULL) return;
12580 
12581   /* skip feature types that do not use data.value.ptrvalue */
12582   switch (sfp->data.choice) {
12583     case SEQFEAT_COMMENT:
12584     case SEQFEAT_BOND:
12585     case SEQFEAT_SITE:
12586     case SEQFEAT_PSEC_STR:
12587       return;
12588     default:
12589       break;
12590   }
12591 
12592   if (sfp->data.value.ptrvalue == NULL) return;
12593 
12594   switch (sfp->data.choice) {
12595     case SEQFEAT_PROT:
12596       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
12597       desc = prp->desc;
12598       if (! isEmblOrDdbj) {
12599         CleanVisStringList (&(prp->name));
12600         break;
12601       }
12602       for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
12603         str = (CharPtr) vnp->data.ptrvalue;
12604         if (StringHasNoText (str)) continue;
12605         if (StringICmp (str, "RbcL") == 0 || StringICmp (str, "rubisco large subunit") == 0) {
12606           vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit");
12607           str = MemFree (str);
12608           if (StringICmp (desc, "RbcL") == 0 || StringICmp (desc, "rubisco large subunit") == 0) {
12609             prp->desc = MemFree (prp->desc);
12610           }
12611         } else if (StringICmp (str, "RbcS") == 0 || StringICmp (str, "rubisco small subunit") == 0) {
12612           vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit");
12613           str = MemFree (str);
12614           if (StringICmp (desc, "RbcS") == 0 || StringICmp (desc, "rubisco small subunit") == 0) {
12615             prp->desc = MemFree (prp->desc);
12616           }
12617         /*
12618         } else if (StringCmp (desc, str) == 0) {
12619           prp->desc = MemFree (prp->desc);
12620         */
12621         }
12622         if (StringStr (str, "ribulose") != NULL &&
12623             StringStr (str, "bisphosphate") != NULL &&
12624             StringStr (str, "methyltransferase") == NULL &&
12625             StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit") != 0 &&
12626             StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit") != 0) {
12627           if (StringICmp (str, "ribulose 1,5-bisphosphate carboxylase/oxygenase large subunit") == 0 ||
12628               StringICmp (str, "ribulose 1,5-bisphosphate carboxylase large subunit") == 0 ||
12629               StringICmp (str, "ribulose bisphosphate carboxylase large subunit") == 0 ||
12630               StringICmp (str, "ribulose-bisphosphate carboxylase large subunit") == 0 ||
12631               StringICmp (str, "ribulose-1,5-bisphosphate carboxylase large subunit") == 0 ||
12632               StringICmp (str, "ribulose-1,5-bisphosphate carboxylase, large subunit") == 0 ||
12633               StringICmp (str, "large subunit of ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0 ||
12634               StringICmp (str, "ribulose-1,5-bisphosphate carboxylase oxygenase large subunit") == 0 ||
12635               StringICmp (str, "ribulose bisphosphate carboxylase large chain") == 0 ||
12636               StringICmp (str, "ribulose 1,5-bisphosphate carboxylase-oxygenase large subunit") == 0 ||
12637               StringICmp (str, "ribulose bisphosphate carboxylase oxygenase large subunit") == 0 ||
12638               StringICmp (str, "ribulose 1,5 bisphosphate carboxylase large subunit") == 0 ||
12639               StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase, large subunit") == 0 ||
12640               StringICmp (str, "large subunit of ribulose-1,5-bisphosphate carboxylase/oxgenase") == 0 ||
12641               StringICmp (str, "ribulose bisphosphate carboxylase/oxygenase large subunit") == 0 ||
12642               StringICmp (str, "ribulose-1,5-bisphosphate carboxylase oxygenase, large subunit") == 0 ||
12643               StringICmp (str, "ribulose 5-bisphosphate carboxylase, large subunit") == 0 ||
12644               StringICmp (str, "ribulosebisphosphate carboxylase large subunit") == 0 ||
12645               StringICmp (str, "ribulose bisphosphate large subunit") == 0 ||
12646               StringICmp (str, "ribulose 1,5 bisphosphate carboxylase/oxygenase large subunit") == 0 ||
12647               StringICmp (str, "ribulose 1,5-bisphosphate carboxylase/oxygenase large chain") == 0 ||
12648               StringICmp (str, "large subunit ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0 ||
12649               StringICmp (str, "ribulose-bisphosphate carboxylase, large subunit") == 0 ||
12650               StringICmp (str, "ribulose-1, 5-bisphosphate carboxylase/oxygenase large-subunit") == 0) {
12651             vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit");
12652             str = MemFree (str);
12653           }
12654         }
12655       }
12656       CleanVisStringList (&(prp->name));
12657       break;
12658     case SEQFEAT_RNA :
12659       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12660       if (rrp->type == 255 && rrp->ext.choice == 1) {
12661         name = (CharPtr) rrp->ext.value.ptrvalue;
12662         if (StringCmp (name, "misc_RNA") == 0) {
12663           for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
12664             if (StringCmp (gbq->qual, "product") != 0) continue;
12665             name = gbq->val;
12666             if (StringHasNoText (name)) continue;
12667             if (StringICmp (name, "its1") == 0 || StringICmp (name, "its 1") == 0) {
12668               gbq->val = MemFree (gbq->val);
12669               gbq->val = StringSave ("internal transcribed spacer 1");
12670             } else if (StringICmp (name, "its2") == 0 || StringICmp (name, "its 2") == 0) {
12671               gbq->val = MemFree (gbq->val);
12672               gbq->val = StringSave ("internal transcribed spacer 2");
12673             } else if (StringICmp (name, "its3") == 0 || StringICmp (name, "its 3") == 0) {
12674               gbq->val = MemFree (gbq->val);
12675               gbq->val = StringSave ("internal transcribed spacer 3");
12676             } else if (StringICmp (name, "Ribosomal DNA internal transcribed spacer 1") == 0) {
12677               gbq->val = MemFree (gbq->val);
12678               gbq->val = StringSave ("internal transcribed spacer 1");
12679             } else if (StringICmp (name, "Ribosomal DNA internal transcribed spacer 2") == 0) {
12680               gbq->val = MemFree (gbq->val);
12681               gbq->val = StringSave ("internal transcribed spacer 2");
12682             } else if (StringICmp (name, "Ribosomal DNA internal transcribed spacer 3") == 0) {
12683               gbq->val = MemFree (gbq->val);
12684               gbq->val = StringSave ("internal transcribed spacer 3");
12685             } else if (StringICmp (name, "internal transcribed spacer 1 (ITS1)") == 0) {
12686               gbq->val = MemFree (gbq->val);
12687               gbq->val = StringSave ("internal transcribed spacer 1");
12688             } else if (StringICmp (name, "internal transcribed spacer 2 (ITS2)") == 0) {
12689               gbq->val = MemFree (gbq->val);
12690               gbq->val = StringSave ("internal transcribed spacer 2");
12691             } else if (StringICmp (name, "internal transcribed spacer 3 (ITS3)") == 0) {
12692               gbq->val = MemFree (gbq->val);
12693               gbq->val = StringSave ("internal transcribed spacer 3");
12694             }
12695           }
12696         }
12697       }
12698       break;
12699     default:
12700       break;
12701   }
12702 }
12703 
IsFeatureCommentRedundant(SeqFeatPtr sfp)12704 static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp)
12705 
12706 {
12707   Uint1            aa;
12708   Choice           cbaa;
12709   CodeBreakPtr     cbp;
12710   CharPtr          comment;
12711   CdRegionPtr      crp;
12712   SeqFeatPtr       feat;
12713   Uint1            from;
12714   GBQualPtr        gbq;
12715   GeneRefPtr       grp;
12716   CharPtr          name;
12717   BioseqPtr        prod;
12718   ProtRefPtr       prp;
12719   Uint1            residue;
12720   RNAGenPtr        rgp;
12721   RNAQualPtr       rqp;
12722   RnaRefPtr        rrp;
12723   SeqAnnotPtr      sap;
12724   SeqCodeTablePtr  sctp;
12725   Uint1            seqcode;
12726   SeqIdPtr         sip;
12727   SeqMapTablePtr   smtp;
12728   CharPtr          str;
12729   tRNAPtr          trp;
12730   ValNodePtr       vnp;
12731 
12732   if (sfp == NULL) return FALSE;
12733   comment = sfp->comment;
12734   if (StringHasNoText (comment)) return FALSE;
12735 
12736   if (sfp->excpt && StringDoesHaveText (sfp->except_text)) {
12737     if (StringCmp (comment, sfp->except_text) == 0) return TRUE;
12738   }
12739 
12740   /* skip feature types that do not use data.value.ptrvalue */
12741   switch (sfp->data.choice) {
12742     case SEQFEAT_COMMENT:
12743     case SEQFEAT_BOND:
12744     case SEQFEAT_SITE:
12745     case SEQFEAT_PSEC_STR:
12746       return FALSE;
12747     default:
12748       break;
12749   }
12750 
12751   if (sfp->data.value.ptrvalue == NULL) return FALSE;
12752 
12753   switch (sfp->data.choice) {
12754     case SEQFEAT_GENE:
12755       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
12756       /*
12757       if (StringCmp (comment, grp->locus) == 0) return TRUE;
12758       if (StringCmp (comment, grp->desc) == 0) return TRUE;
12759       */
12760       if (StringCmp (comment, grp->locus_tag) == 0) return TRUE;
12761       for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
12762         str = (CharPtr) vnp->data.ptrvalue;
12763         if (StringHasNoText (str)) continue;
12764         if (StringCmp (comment, str) == 0) return TRUE;
12765       }
12766       break;
12767     case SEQFEAT_CDREGION:
12768       crp = (CdRegionPtr) sfp->data.value.ptrvalue;
12769       for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
12770         seqcode = 0;
12771         sctp = NULL;
12772         cbaa = cbp->aa;
12773         switch (cbaa.choice) {
12774           case 1 :
12775             seqcode = Seq_code_ncbieaa;
12776             break;
12777           case 2 :
12778             seqcode = Seq_code_ncbi8aa;
12779             break;
12780           case 3 :
12781             seqcode = Seq_code_ncbistdaa;
12782             break;
12783           default :
12784             break;
12785         }
12786         if (seqcode != 0) {
12787           sctp = SeqCodeTableFind (seqcode);
12788           if (sctp != NULL) {
12789             residue = cbaa.value.intvalue;
12790             if (residue != 42) {
12791               if (seqcode != Seq_code_ncbieaa) {
12792                 smtp = SeqMapTableFind (seqcode, Seq_code_ncbieaa);
12793                 residue = SeqMapTableConvert (smtp, residue);
12794               }
12795               if (residue == 'U') {
12796                 if (StringCmp (comment, "selenocysteine") == 0) return TRUE;
12797               } else if (residue == 'O') {
12798                 if (StringCmp (comment, "pyrrolysine") == 0) return TRUE;
12799               }
12800             }
12801           }
12802         }
12803       }
12804       if (sfp->product != NULL) {
12805         sip = SeqLocId (sfp->product);
12806         if (sip != NULL) {
12807           prod = BioseqFind (sip);
12808           if (prod != NULL) {
12809             for (sap = prod->annot; sap != NULL; sap = sap->next) {
12810               if (sap->type != 1) continue;
12811               for (feat = (SeqFeatPtr) sap->data; feat != NULL; feat = feat->next) {
12812                 if (feat->data.choice != SEQFEAT_PROT) continue;
12813                 prp = (ProtRefPtr) feat->data.value.ptrvalue;
12814                 if (prp == NULL) continue;
12815                 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
12816                   str = (CharPtr) vnp->data.ptrvalue;
12817                   if (StringHasNoText (str)) continue;
12818                   if (StringCmp (comment, str) == 0) return TRUE;
12819                 }
12820               }
12821             }
12822           }
12823         }
12824       }
12825       break;
12826     case SEQFEAT_PROT:
12827       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
12828       for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
12829         str = (CharPtr) vnp->data.ptrvalue;
12830         if (StringHasNoText (str)) continue;
12831         if (StringCmp (comment, str) == 0) return TRUE;
12832       }
12833       if (StringDoesHaveText (prp->desc)) {
12834         if (StringCmp (comment, prp->desc) == 0) return TRUE;
12835       }
12836       for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
12837         str = (CharPtr) vnp->data.ptrvalue;
12838         if (StringHasNoText (str)) continue;
12839         if (StringCmp (comment, str) == 0) return TRUE;
12840       }
12841       break;
12842     case SEQFEAT_RNA :
12843       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12844       if (rrp->type == 255 && rrp->ext.choice == 1) {
12845         name = (CharPtr) rrp->ext.value.ptrvalue;
12846         if (StringCmp (name, "misc_RNA") == 0) {
12847           for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
12848             if (StringCmp (gbq->qual, "product") != 0) continue;
12849             name = gbq->val;
12850             if (StringHasNoText (name)) continue;
12851             /*
12852             if (StringICmp (name, "internal transcribed spacer 1") == 0) {
12853               if (StringICmp (comment, "its1") == 0 || StringICmp (comment, "its 1") == 0) return TRUE;
12854             } else if (StringICmp (name, "internal transcribed spacer 2") == 0) {
12855               if (StringICmp (comment, "its2") == 0 || StringICmp (comment, "its 2") == 0) return TRUE;
12856             } else if (StringICmp (name, "internal transcribed spacer 3") == 0) {
12857               if (StringICmp (comment, "its3") == 0 || StringICmp (comment, "its 3") == 0) return TRUE;
12858             }
12859             */
12860           }
12861         }
12862       } else if (rrp->type == 3 && rrp->ext.choice == 2) {
12863         trp = (tRNAPtr) rrp->ext.value.ptrvalue;
12864         if (trp != NULL) {
12865           aa = 0;
12866           if (trp->aatype == 2) {
12867             aa = trp->aa;
12868           } else {
12869             from = 0;
12870             switch (trp->aatype) {
12871               case 0 :
12872                 from = 0;
12873                 break;
12874               case 1 :
12875                 from = Seq_code_iupacaa;
12876                 break;
12877               case 2 :
12878                 from = Seq_code_ncbieaa;
12879                 break;
12880               case 3 :
12881                 from = Seq_code_ncbi8aa;
12882                 break;
12883               case 4 :
12884                 from = Seq_code_ncbistdaa;
12885                 break;
12886               default:
12887                 break;
12888             }
12889             seqcode = Seq_code_ncbieaa;
12890             smtp = SeqMapTableFind (seqcode, from);
12891             if (smtp != NULL) {
12892               aa = SeqMapTableConvert (smtp, trp->aa);
12893               if (aa == 255 && from == Seq_code_iupacaa) {
12894                 if (trp->aa == 'U') {
12895                   aa = 'U';
12896                 } else if (trp->aa == 'O') {
12897                   aa = 'O';
12898                 }
12899               }
12900             }
12901           }
12902           if (aa > 0 && aa != 255) {
12903             if (StringNCmp (comment, "aa: ", 4) == 0) {
12904               comment += 4;
12905             }
12906             residue = FindTrnaAA3 (comment);
12907             if (residue == aa) {
12908               if (aa == 'M' && StringICmp ("fMet", comment) == 0) return FALSE;
12909               if (aa == 'M' && StringICmp ("iMet", comment) == 0) return FALSE;
12910               return TRUE;
12911             }
12912             residue = FindTrnaAA (comment);
12913             if (residue == aa) return TRUE;
12914           }
12915         }
12916       } else if (rrp->ext.choice == 3) {
12917         rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
12918         if (rgp != NULL) {
12919           if (StringCmp (comment, rgp->product) == 0) return TRUE;
12920           if (StringCmp (comment, rgp->_class) == 0) return TRUE;
12921           for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
12922             if (StringCmp (comment, rqp->val) == 0) return TRUE;
12923           }
12924         }
12925       }
12926       break;
12927     default:
12928       break;
12929   }
12930 
12931   return FALSE;
12932 }
12933 
12934 
ExtractSatelliteFromComment(CharPtr comment)12935 static CharPtr ExtractSatelliteFromComment (CharPtr comment)
12936 {
12937   CharPtr satellite_type = NULL, satellite_start = NULL;
12938   CharPtr satellite_qual = NULL;
12939   Int4    satellite_len, i;
12940 
12941   if (StringHasNoText (comment)) {
12942     return NULL;
12943   }
12944 
12945   if (StringNCmp (comment, "microsatellite", 14) == 0) {
12946     satellite_type = "microsatellite";
12947     satellite_start = comment;
12948   } else if (StringNCmp (comment, "minisatellite", 13) == 0) {
12949     satellite_type = "minisatellite";
12950     satellite_start = comment;
12951   } else if (StringNCmp (comment, "satellite", 9) == 0) {
12952     satellite_type = "satellite";
12953     satellite_start = comment;
12954   }
12955 
12956   if (satellite_start == NULL) {
12957     return NULL;
12958   }
12959 
12960   satellite_len = StringLen (satellite_type);
12961   if (comment[satellite_len] == '\0') {
12962     satellite_qual = StringSave (satellite_type);
12963     *comment = 0;
12964   } else if (comment[satellite_len] == ';') {
12965     satellite_qual = StringSave (satellite_type);
12966     for (i = 0; i <= satellite_len; i++) {
12967       comment [i] = ' ';
12968     }
12969     TrimSpacesAroundString (comment);
12970   }
12971   if (comment != NULL && comment [0] == '~' && comment [1] != '~') {
12972     comment [0] = ' ';
12973     TrimSpacesAroundString (comment);
12974   }
12975 
12976   return satellite_qual;
12977 }
12978 
DoModernizeRNAFields(SeqFeatPtr sfp)12979 static void DoModernizeRNAFields (SeqFeatPtr sfp)
12980 
12981 {
12982   RNAQualSetPtr       nextrqp;
12983   RNAQualSetPtr PNTR  prevrqp;
12984   RNAGenPtr           rgp;
12985   RNAQualSetPtr       rqp;
12986   RnaRefPtr           rrp;
12987   CharPtr             str;
12988   Boolean             unlink;
12989   Int2                i;
12990   size_t              len;
12991   CharPtr             ncclass;
12992   CharPtr             product;
12993   CharPtr             tmp;
12994 
12995   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return;
12996 
12997   ModernizeRNAFields (sfp);
12998   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12999   if (rrp == NULL) return;
13000 
13001   if (rrp->ext.choice == 1 && rrp->type == 10) {
13002     str = rrp->ext.value.ptrvalue;
13003     if (StringHasNoText (str)) return;
13004 
13005     rgp = (RNAGenPtr) MemNew (sizeof (RNAGen));
13006     if (rgp == NULL) return;
13007     rrp->ext.choice = 3;
13008     rrp->ext.value.ptrvalue = (Pointer) rgp;
13009     rgp->product = str;
13010   }
13011 
13012   if (rrp->ext.choice != 3) return;
13013 
13014   rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
13015   if (rgp == NULL) return;
13016 
13017   rqp = rgp->quals;
13018   prevrqp = (RNAQualSetPtr PNTR) &(rgp->quals);
13019   while (rqp != NULL) {
13020     nextrqp = rqp->next;
13021     unlink = FALSE;
13022     if (StringHasNoText (rqp->qual) || StringHasNoText (rqp->val)) {
13023       unlink = TRUE;
13024     }
13025     if (unlink) {
13026       *(prevrqp) = rqp->next;
13027       rqp->next = NULL;
13028       RNAQualFree (rqp);
13029     } else {
13030       prevrqp = (RNAQualSetPtr PNTR) &(rqp->next);
13031     }
13032     rqp = nextrqp;
13033   }
13034 
13035   if (rrp->type == 10 && StringDoesHaveText (rgp->product) && rgp->_class == NULL) {
13036     ncclass = rgp->product;
13037     for (i = 0; ncrnaClassList [i] != NULL; i++) {
13038       str = ncrnaClassList [i];
13039       if (StringHasNoText (str)) continue;
13040       len = StringLen (str);
13041       if (len < 1) continue;
13042       if (StringNICmp (ncclass, str, len) != 0) continue;
13043       if (ncclass [len] != ' ') continue;
13044       tmp = ncclass + len + 1;
13045       if (StringHasNoText (tmp)) continue;
13046       ncclass [len] = '\0';
13047       rgp->_class = StringSave (ncclass);
13048       product = StringSave (tmp);
13049       rgp->product = MemFree (rgp->product);
13050       rgp->product = product;
13051       TrimSpacesAroundString (rgp->_class);
13052       TrimSpacesAroundString (rgp->product);
13053       rrp->type = 8;
13054       sfp->idx.subtype = FEATDEF_ncRNA;
13055     }
13056   }
13057 
13058   if (rgp->quals != NULL) return;
13059 
13060   if (rrp->type == 2 || rrp->type == 4) {
13061     if (StringDoesHaveText (rgp->product) && StringHasNoText (rgp->_class)) {
13062       str = StringSave (rgp->product);
13063       rrp->ext.choice = 1;
13064       rrp->ext.value.ptrvalue = (Pointer) str;
13065       RNAGenFree (rgp);
13066       return;
13067     }
13068   }
13069 
13070   if (StringDoesHaveText (rgp->_class) || StringDoesHaveText (rgp->product)) return;
13071 
13072   rrp->ext.value.ptrvalue = NULL;
13073   rrp->ext.choice = 0;
13074   RNAGenFree (rgp);
13075 }
13076 
13077 
FixncRNAClass(SeqFeatPtr sfp)13078 static void FixncRNAClass (SeqFeatPtr sfp)
13079 {
13080   RnaRefPtr rrp;
13081   RNAGenPtr rgp;
13082 
13083   if (sfp == NULL || sfp->idx.subtype != FEATDEF_ncRNA
13084       || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
13085       || rrp->ext.choice != 3
13086       || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL)
13087   {
13088     return;
13089   }
13090 
13091   if (StringICmp (rgp->_class, "antisense") == 0) {
13092     rgp->_class = MemFree (rgp->_class);
13093     rgp->_class = StringSave ("antisense_RNA");
13094   }
13095 }
13096 
13097 
MoveBioSourceFeatureNoteToSubSourceNote(SeqFeatPtr sfp)13098 static void MoveBioSourceFeatureNoteToSubSourceNote (SeqFeatPtr sfp)
13099 {
13100   ValNode vn;
13101 
13102   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || StringHasNoText (sfp->comment)) {
13103     return;
13104   }
13105 
13106   MemSet (&vn, 0, sizeof (ValNode));
13107   vn.choice = SourceQualChoice_textqual;
13108   vn.data.intvalue = Source_qual_subsource_note;
13109 
13110   SetSourceQualInBioSource (sfp->data.value.ptrvalue, &vn, NULL, sfp->comment, ExistingTextOption_append_semi);
13111   sfp->comment = MemFree (sfp->comment);
13112 }
13113 
13114 
ConsolidateOneLikeSubSourceModifier(SubSourcePtr match_to,Boolean use_semicolon)13115 NLM_EXTERN void ConsolidateOneLikeSubSourceModifier (
13116   SubSourcePtr match_to,
13117   Boolean use_semicolon
13118 )
13119 {
13120   SubSourcePtr prev, index;
13121   Int4         len, num_matches;
13122   CharPtr      new_value;
13123 
13124   if (match_to == NULL) return;
13125   len = StringLen (match_to->name) + 1;
13126   num_matches = 0;
13127   prev = match_to;
13128   index = match_to->next;
13129   while (index != NULL)
13130   {
13131     if (index->subtype == match_to->subtype && index->name != NULL)
13132     {
13133       len += StringLen (index->name) + 2;
13134       num_matches++;
13135     }
13136     index = index->next;
13137   }
13138   if (num_matches == 0) return;
13139 
13140   new_value = MemNew (len * sizeof (char));
13141   if (new_value == NULL) return;
13142 
13143   StringCpy (new_value, match_to->name);
13144   index = match_to->next;
13145   while (index != NULL)
13146   {
13147     if (index->subtype == match_to->subtype && index->name != NULL)
13148     {
13149       if (use_semicolon)
13150       {
13151         StringCat (new_value, "; ");
13152       }
13153       else
13154       {
13155         StringCat (new_value, " ");
13156       }
13157       StringCat (new_value, index->name);
13158       prev->next = index->next;
13159       index->next = NULL;
13160       SubSourceFree (index);
13161       index = prev;
13162     }
13163     prev = index;
13164     index = index->next;
13165   }
13166   MemFree (match_to->name);
13167   match_to->name = new_value;
13168 }
13169 
13170 
ConsolidateOneLikeOrganismModifier(OrgModPtr match_to,Boolean use_semicolon)13171 NLM_EXTERN void ConsolidateOneLikeOrganismModifier (
13172   OrgModPtr match_to,
13173   Boolean use_semicolon
13174 )
13175 {
13176   OrgModPtr prev, index;
13177   Int4      len, num_matches;
13178   CharPtr   new_value;
13179 
13180   if (match_to == NULL) return;
13181   len = StringLen (match_to->subname) + 1;
13182   num_matches = 0;
13183   prev = match_to;
13184   index = match_to->next;
13185   while (index != NULL)
13186   {
13187     if (index->subtype == match_to->subtype && index->subname != NULL)
13188     {
13189       len += StringLen (index->subname) + 2;
13190       num_matches++;
13191     }
13192     index = index->next;
13193   }
13194   if (num_matches == 0) return;
13195 
13196   new_value = MemNew (len * sizeof (char));
13197   if (new_value == NULL) return;
13198 
13199   StringCpy (new_value, match_to->subname);
13200   index = match_to->next;
13201   while (index != NULL)
13202   {
13203     if (index->subtype == match_to->subtype && index->subname != NULL)
13204     {
13205       if (use_semicolon)
13206       {
13207         StringCat (new_value, "; ");
13208       }
13209       else
13210       {
13211         StringCat (new_value, " ");
13212       }
13213       StringCat (new_value, index->subname);
13214       prev->next = index->next;
13215       index->next = NULL;
13216       OrgModFree (index);
13217       index = prev;
13218     }
13219     prev = index;
13220     index = index->next;
13221   }
13222   MemFree (match_to->subname);
13223   match_to->subname = new_value;
13224 }
13225 
13226 typedef struct reg_feat {
13227   CharPtr  feat_key;
13228   CharPtr  reg_class;
13229 } RegFeatData, PNTR RegFeatPtr;
13230 
13231 static RegFeatData reg_feat_keys [] = {
13232   { "enhancer",     "enhancer"              },
13233   { "promoter",     "promoter"              },
13234   { "CAAT_signal",  "CAAT_signal"           },
13235   { "TATA_signal",  "TATA_box"              },
13236   { "-35_signal",   "minus_35_signal"       },
13237   { "-10_signal",   "minus_10_signal"       },
13238   { "GC_signal",    "GC_signal"             },
13239   { "RBS",          "ribosome_binding_site" },
13240   { "polyA_signal", "polyA_signal_sequence" },
13241   { "attenuator",   "attenuator"            },
13242   { "terminator",   "terminator"            },
13243   { "misc_signal",  "other"                 },
13244   { NULL,           NULL                    }
13245 };
13246 
ConsolidateBioSourceNotes(BioSourcePtr biop)13247 NLM_EXTERN void ConsolidateBioSourceNotes (BioSourcePtr biop)
13248 {
13249   SubSourcePtr ssp, note_ssp;
13250   OrgModPtr    mod, note_mod;
13251 
13252   if (biop == NULL) return;
13253 
13254   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
13255   {
13256     if (ssp->subtype == 255 && ssp->name != NULL)
13257     {
13258       ConsolidateOneLikeSubSourceModifier (ssp, TRUE);
13259       note_ssp = ssp;
13260     }
13261   }
13262 
13263   if (biop->org == NULL || biop->org->orgname == NULL) return;
13264   for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next)
13265   {
13266     if (mod->subtype == 255 && mod->subname != NULL)
13267     {
13268       ConsolidateOneLikeOrganismModifier (mod, TRUE);
13269       note_mod = mod;
13270     }
13271   }
13272 }
13273 
13274 
CleanUpSeqFeat(SeqFeatPtr sfp,Boolean isEmblOrDdbj,Boolean isJscan,Boolean stripSerial,Boolean modernizeFeats,ValNodePtr PNTR publist)13275 NLM_EXTERN void CleanUpSeqFeat (
13276   SeqFeatPtr sfp,
13277   Boolean isEmblOrDdbj,
13278   Boolean isJscan,
13279   Boolean stripSerial,
13280   Boolean modernizeFeats,
13281   ValNodePtr PNTR publist
13282 )
13283 
13284 {
13285   BioseqPtr     bsp;
13286   CodeBreakPtr  cbp;
13287   CdRegionPtr   crp;
13288   GBQualPtr     gbq;
13289   Boolean       emptyRNA;
13290   IntFuzzPtr    fuzz;
13291   GeneRefPtr    grp;
13292   Boolean       hasGibbsq;
13293   Boolean       hasNulls;
13294   SeqIdPtr      id;
13295   ImpFeatPtr    ifp;
13296   Int2          j;
13297   MolInfoPtr    mip;
13298   CharPtr       name;
13299   CharPtr       note;
13300   Boolean       partial5;
13301   Boolean       partial3;
13302   SeqPntPtr     pntp;
13303   Uint1         processed;
13304   ProtRefPtr    prp;
13305   ValNodePtr    psp;
13306   RNAGenPtr     rgp;
13307   RNAQualPtr    rqp;
13308   RnaRefPtr     rrp;
13309   Uint1         rrptype;
13310   CharPtr       satellite_type;
13311   SeqDescrPtr   sdp;
13312   SeqIntPtr     sintp;
13313   SeqIdPtr      sip;
13314   SeqLocPtr     slp;
13315   CharPtr       str;
13316   Uint1         strand;
13317   Boolean       sync_mol_info;
13318   tRNAPtr       trp;
13319   SeqFeatXrefPtr  xref, next, PNTR prevlink;
13320 
13321   if (sfp == NULL) return;
13322   crp = NULL;
13323   if (sfp->data.choice == SEQFEAT_IMP) {
13324     ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
13325     if (ifp != NULL) {
13326       if (ifp->loc != NULL) {
13327         str = StringStr (ifp->loc, "replace");
13328         if (str != NULL) {
13329           AddReplaceQual (sfp, str);
13330           ifp->loc = MemFree (ifp->loc);
13331         }
13332       }
13333       if (StringCmp (ifp->key, "CDS") == 0) {
13334         if (! isEmblOrDdbj) {
13335           sfp->data.value.ptrvalue = ImpFeatFree (ifp);
13336           sfp->data.choice = SEQFEAT_CDREGION;
13337           crp = CdRegionNew ();
13338           sfp->data.value.ptrvalue = crp;
13339           sfp->idx.subtype = FEATDEF_CDS;
13340         }
13341       } else if (StringCmp (ifp->key, "allele") == 0 ||
13342                  StringCmp (ifp->key, "mutation") == 0) {
13343         ifp->key = MemFree (ifp->key);
13344         ifp->key = StringSave ("variation");
13345         sfp->idx.subtype = FEATDEF_variation;
13346       } else if (StringCmp (ifp->key, "Import") == 0 ||
13347                  StringCmp (ifp->key, "virion") == 0) {
13348         ifp->key = MemFree (ifp->key);
13349         ifp->key = StringSave ("misc_feature");
13350         sfp->idx.subtype = FEATDEF_misc_feature;
13351       } else if (StringCmp (ifp->key, "repeat_unit") == 0 ) {
13352         ifp->key = MemFree (ifp->key);
13353         ifp->key = StringSave ("repeat_region");
13354         sfp->idx.subtype = FEATDEF_repeat_region;
13355       } else if (StringCmp (ifp->key, "misc_bind") == 0) {
13356         ifp->key = MemFree (ifp->key);
13357         ifp->key = StringSave ("misc_binding");
13358         sfp->idx.subtype = FEATDEF_misc_binding;
13359       } else if (StringCmp (ifp->key, "satellite") == 0 && (! isEmblOrDdbj)) {
13360         ifp->key = MemFree (ifp->key);
13361         ifp->key = StringSave ("repeat_region");
13362         sfp->idx.subtype = FEATDEF_repeat_region;
13363         gbq = GBQualNew ();
13364         if (gbq != NULL) {
13365           gbq->qual = StringSave ("satellite");
13366           gbq->val = ExtractSatelliteFromComment (sfp->comment);
13367           if (gbq->val == NULL) {
13368             gbq->val = StringSave ("satellite");
13369           }
13370           gbq->next = sfp->qual;
13371           sfp->qual = gbq;
13372         }
13373       } else if (StringCmp (ifp->key, "LTR") == 0) {
13374         ifp->key = MemFree (ifp->key);
13375         ifp->key = StringSave ("repeat_region");
13376         sfp->idx.subtype = FEATDEF_repeat_region;
13377         gbq = GBQualNew ();
13378         if (gbq != NULL) {
13379           gbq->qual = StringSave ("rpt_type");
13380           gbq->val = StringSave ("long_terminal_repeat");
13381           gbq->next = sfp->qual;
13382           sfp->qual = gbq;
13383         }
13384       } else if (StringHasNoText (ifp->loc)) {
13385         rrptype = 0;
13386         if (StringCmp (ifp->key, "precursor_RNA") == 0) {
13387           rrptype = 1;
13388         } else if (StringCmp (ifp->key, "mRNA") == 0) {
13389           rrptype = 2;
13390         } else if (StringCmp (ifp->key, "tRNA") == 0) {
13391           rrptype = 3;
13392         } else if (StringCmp (ifp->key, "rRNA") == 0) {
13393           rrptype = 4;
13394         } else if (StringCmp (ifp->key, "snRNA") == 0) {
13395           rrptype = 5;
13396         } else if (StringCmp (ifp->key, "scRNA") == 0) {
13397           rrptype = 6;
13398         } else if (StringCmp (ifp->key, "snoRNA") == 0) {
13399           rrptype = 7;
13400         } else if (StringCmp (ifp->key, "misc_RNA") == 0) {
13401           rrptype = 255;
13402         }
13403         if (rrptype != 0) {
13404           sfp->data.value.ptrvalue = ImpFeatFree (ifp);
13405           sfp->data.choice = SEQFEAT_RNA;
13406           rrp = RnaRefNew ();
13407           sfp->data.value.ptrvalue = rrp;
13408           rrp->type = rrptype;
13409           sfp->idx.subtype = FindFeatDefType (sfp);
13410         } else {
13411           processed = 0;
13412           if (StringCmp (ifp->key, "proprotein") == 0 || StringCmp (ifp->key, "preprotein") == 0) {
13413             processed = 1;
13414           } else if (StringCmp (ifp->key, "mat_peptide") == 0) {
13415             processed = 2;
13416           } else if (StringCmp (ifp->key, "sig_peptide") == 0) {
13417             processed = 3;
13418           } else if (StringCmp (ifp->key, "transit_peptide") == 0) {
13419             processed = 4;
13420           } else if (StringCmp (ifp->key, "propeptide") == 0 || StringCmp (ifp->key, "pro_peptide") == 0) {
13421             processed = 5;
13422           }
13423           if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) {
13424             bsp = BioseqFind (SeqLocId (sfp->location));
13425             if (bsp != NULL && ISA_aa (bsp->mol)) {
13426               sfp->data.value.ptrvalue = ImpFeatFree (ifp);
13427               sfp->data.choice = SEQFEAT_PROT;
13428               prp = ProtRefNew ();
13429               sfp->data.value.ptrvalue = prp;
13430               prp->processed = processed;
13431               sfp->idx.subtype = FindFeatDefType (sfp);
13432             }
13433           }
13434         }
13435       }
13436       if (sfp->data.choice == SEQFEAT_IMP && StringCmp (ifp->key, "repeat_region") == 0 && (! isEmblOrDdbj)) {
13437         satellite_type = ExtractSatelliteFromComment (sfp->comment);
13438         if (satellite_type != NULL) {
13439           gbq = GBQualNew ();
13440           if (gbq != NULL) {
13441             gbq->qual = StringSave ("satellite");
13442             gbq->val = satellite_type;
13443             gbq->next = sfp->qual;
13444             sfp->qual = gbq;
13445           }
13446         }
13447       }
13448     }
13449   }
13450   if (crp != NULL && crp->frame == 0 && (! sfp->pseudo)) {
13451     crp->frame = GetFrameFromLoc (sfp->location);
13452   }
13453   if (sfp->data.choice == SEQFEAT_IMP) {
13454     ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
13455     if (ifp != NULL) {
13456       for (j = 0; reg_feat_keys [j].feat_key != NULL; j++) {
13457         if (StringICmp (ifp->key, reg_feat_keys [j].feat_key) == 0) {
13458           ifp->key = MemFree (ifp->key);
13459           ifp->key = StringSave ("regulatory");
13460           sfp->idx.subtype = FEATDEF_regulatory;
13461           gbq = GBQualNew ();
13462           if (gbq != NULL) {
13463             gbq->qual = StringSave ("regulatory_class");
13464             gbq->val = StringSave (reg_feat_keys [j].reg_class);
13465             gbq->next = sfp->qual;
13466             sfp->qual = gbq;
13467           }
13468           break;
13469         }
13470       }
13471     }
13472   }
13473   if (sfp->data.choice == SEQFEAT_IMP) {
13474     ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
13475     if (ifp != NULL && StringCmp (ifp->key, "regulatory") == 0) {
13476       note = NULL;
13477       for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
13478         if (StringCmp (gbq->qual, "regulatory_class") != 0) continue;
13479         str = StringChr (gbq->val, ':');
13480         if (str == NULL) continue;
13481         if (StringNCmp (gbq->val, "other:", 6) == 0) continue;
13482         *str = '\0';
13483         str++;
13484         TrimSpacesAroundString (str);
13485         if (StringHasNoText (str)) continue;
13486         note = str;
13487       }
13488       if (StringDoesHaveText (note)) {
13489         gbq = GBQualNew ();
13490         if (gbq != NULL) {
13491           gbq->qual = StringSave ("note");
13492           gbq->val = StringSave (note);
13493           gbq->next = sfp->qual;
13494           sfp->qual = gbq;
13495         }
13496       }
13497     }
13498   }
13499   if (sfp->data.choice == SEQFEAT_RNA) {
13500     rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
13501     if (rrp != NULL) {
13502       if (rrp->ext.choice == 1) {
13503         name = (CharPtr) rrp->ext.value.ptrvalue;
13504         if (StringHasNoText (name)) {
13505           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
13506           rrp->ext.choice = 0;
13507         }
13508       } else if (rrp->ext.choice == 2) {
13509         trp = (tRNAPtr) rrp->ext.value.ptrvalue;
13510         if (trp != NULL) {
13511           if (trp->aatype == 0 && trp->aa == 0 && trp->anticodon == NULL) {
13512             emptyRNA = TRUE;
13513             for (j = 0; j < 6; j++) {
13514               if (trp->codon [j] != 255) {
13515                 emptyRNA = FALSE;
13516               }
13517             }
13518             if (emptyRNA) {
13519               rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
13520               rrp->ext.choice = 0;
13521             }
13522           }
13523         }
13524       } else if (rrp->ext.choice == 3) {
13525         rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
13526         if (rgp != NULL) {
13527           if (StringHasNoText (rgp->_class) && StringHasNoText (rgp->product)) {
13528             emptyRNA = TRUE;
13529             for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
13530               if (StringDoesHaveText (rqp->qual) && StringDoesHaveText (rqp->val)) {
13531                 emptyRNA = FALSE;
13532               }
13533             }
13534             if (emptyRNA) {
13535               rrp->ext.value.ptrvalue = RNAGenFree (rrp->ext.value.ptrvalue);
13536               rrp->ext.choice = 0;
13537             }
13538           }
13539         }
13540       }
13541     }
13542   }
13543   ModernizeFeatureGBQuals (sfp);
13544   sfp->qual = SortFeatureGBQuals (sfp->qual);
13545   CleanupDuplicateGBQuals (&(sfp->qual));
13546   CleanupFeatureGBQuals (sfp, isEmblOrDdbj);
13547   sfp->qual = SortIllegalGBQuals (sfp->qual);
13548   CleanupFeatureStrings (sfp, isJscan, isEmblOrDdbj, stripSerial, modernizeFeats, publist);
13549   FixOldDbxrefs (sfp->dbxref, isEmblOrDdbj);
13550   FixNumericDbxrefs (sfp->dbxref);
13551   sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref);
13552   CleanupDuplicateDbxrefs (&(sfp->dbxref));
13553   CleanupObsoleteDbxrefs (&(sfp->dbxref));
13554   CleanupGoDbxrefs (sfp->dbxref);
13555   psp = sfp->cit;
13556   if (psp != NULL && psp->data.ptrvalue) {
13557     psp->data.ptrvalue = ValNodeSort ((ValNodePtr) psp->data.ptrvalue, SortCits);
13558     CleanupDuplicateCits ((ValNodePtr PNTR) &(psp->data.ptrvalue));
13559   }
13560   CleanUpSeqLoc (sfp->location);
13561   strand = SeqLocStrand (sfp->location);
13562   id = SeqLocId (sfp->location);
13563   if (sfp->data.choice == SEQFEAT_GENE) {
13564     grp = (GeneRefPtr) sfp->data.value.ptrvalue;
13565     if (grp != NULL) {
13566       if (grp->pseudo) {
13567         sfp->pseudo = TRUE;
13568         grp->pseudo = FALSE;
13569       }
13570     }
13571   }
13572   if (sfp->data.choice == SEQFEAT_CDREGION) {
13573     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
13574     if (crp != NULL) {
13575       crp->code_break = SortCodeBreaks (sfp, crp->code_break);
13576       CleanupDuplicatedCodeBreaks (&(crp->code_break));
13577       for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
13578         CleanUpSeqLoc (cbp->loc);
13579         if (strand == Seq_strand_minus && id != NULL) {
13580           slp = cbp->loc;
13581           if (slp != NULL && slp->choice == SEQLOC_INT) {
13582             sip = SeqLocId (slp);
13583             if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
13584               sintp = (SeqIntPtr) slp->data.ptrvalue;
13585               if (sintp != NULL) {
13586                 sintp->strand = Seq_strand_minus;
13587               }
13588             }
13589           }
13590         }
13591       }
13592     }
13593   }
13594   if (sfp->data.choice == SEQFEAT_RNA) {
13595     rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
13596     if (rrp != NULL) {
13597       if (rrp->pseudo) {
13598         sfp->pseudo = TRUE;
13599         rrp->pseudo = FALSE;
13600       }
13601     }
13602     if (rrp != NULL && rrp->ext.choice == 2) {
13603       trp = (tRNAPtr) rrp->ext.value.ptrvalue;
13604       if (trp != NULL && trp->anticodon != NULL) {
13605         CleanUpSeqLoc (trp->anticodon);
13606         if (strand == Seq_strand_minus && id != NULL) {
13607           slp = trp->anticodon;
13608           if (slp != NULL && slp->choice == SEQLOC_INT) {
13609             sip = SeqLocId (slp);
13610             if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
13611               sintp = (SeqIntPtr) slp->data.ptrvalue;
13612               if (sintp != NULL) {
13613                 sintp->strand = Seq_strand_minus;
13614               }
13615             }
13616           }
13617         }
13618       }
13619     }
13620     if (ConvertToNcRNA (sfp)) {
13621       sfp->idx.subtype = FindFeatDefType (sfp);
13622     }
13623     if (sfp->idx.subtype == FEATDEF_ncRNA) {
13624       FixncRNAClass (sfp);
13625     }
13626   }
13627   if (sfp->data.choice == SEQFEAT_PROT) {
13628     prp = (ProtRefPtr) sfp->data.value.ptrvalue;
13629     if (prp != NULL && sfp->partial) {
13630       CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
13631       if (! partial5 && ! partial3) {
13632         bsp = BioseqFind (SeqLocId (sfp->location));
13633         if (bsp != NULL && ISA_aa (bsp->mol)) {
13634           hasGibbsq = FALSE;
13635           for (sip = bsp->id; sip != NULL; sip = sip->next) {
13636             if (sip->choice == SEQID_GIBBSQ) {
13637               hasGibbsq = TRUE;
13638             }
13639           }
13640           if (hasGibbsq) {
13641             sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_title, NULL);
13642             if (sdp != NULL && sdp->choice == Seq_descr_title) {
13643               str = (CharPtr) sdp->data.ptrvalue;
13644               if (StringDoesHaveText (str)) {
13645                 sync_mol_info = FALSE;
13646                 if (StringStr (str, "{N-terminal}") != NULL) {
13647                   partial3 = TRUE;
13648                   sync_mol_info = TRUE;
13649                 } else if (StringStr (str, "{C-terminal}") != NULL) {
13650                   partial5 = TRUE;
13651                   sync_mol_info = TRUE;
13652                 }
13653                 if (sync_mol_info) {
13654                   SetSeqLocPartial (sfp->location, partial5, partial3);
13655                   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
13656                   if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
13657                     mip = (MolInfoPtr) sdp->data.ptrvalue;
13658                     if (mip != NULL) {
13659                       if (partial5 && partial3) {
13660                         mip->completeness = 5;
13661                       } else if (partial5) {
13662                         mip->completeness = 3;
13663                       } else if (partial3) {
13664                         mip->completeness = 4;
13665                       } else if (sfp->partial) {
13666                         mip->completeness = 2;
13667                       } else {
13668                         mip->completeness = 0;
13669                       }
13670                     }
13671                   }
13672                 }
13673               }
13674             }
13675           }
13676         }
13677       }
13678     }
13679   }
13680   if (sfp->data.choice == SEQFEAT_REGION ||
13681              sfp->data.choice == SEQFEAT_SITE ||
13682              sfp->data.choice == SEQFEAT_BOND ||
13683              sfp->data.choice == SEQFEAT_PROT) {
13684     bsp = BioseqFind (SeqLocId (sfp->location));
13685     if (bsp != NULL && ISA_aa (bsp->mol)) {
13686       slp = SeqLocFindNext (sfp->location, NULL);
13687       while (slp != NULL) {
13688         if (slp->choice == SEQLOC_INT) {
13689           sintp = (SeqIntPtr) slp->data.ptrvalue;
13690           if (sintp != NULL) {
13691             if (sintp->strand != Seq_strand_unknown) {
13692               sintp->strand = Seq_strand_unknown;
13693             }
13694           }
13695         } else if (slp->choice == SEQLOC_PNT) {
13696           pntp = (SeqPntPtr) slp->data.ptrvalue;
13697           if (pntp->strand != Seq_strand_unknown) {
13698             pntp->strand = Seq_strand_unknown;
13699           }
13700         }
13701         slp = SeqLocFindNext (sfp->location, slp);
13702       }
13703     }
13704   }
13705   if (sfp->data.choice == SEQFEAT_BIOSRC) {
13706     /* combine multiple orgmod or subsource note qualifiers */
13707     ConsolidateBioSourceNotes(sfp->data.value.ptrvalue);
13708     /* if a BioSource feature has a comment, move the comment to
13709      * a subsource note.
13710      */
13711     MoveBioSourceFeatureNoteToSubSourceNote(sfp);
13712   }
13713 
13714   ModernizeFeatureStrings (sfp, isEmblOrDdbj);
13715 
13716   if (sfp->data.choice == SEQFEAT_GENE) {
13717     if (modernizeFeats) {
13718       ModernizeGeneFields (sfp);
13719     }
13720   }
13721 
13722   if (sfp->data.choice == SEQFEAT_RNA) {
13723     if (modernizeFeats) {
13724       DoModernizeRNAFields (sfp);
13725     }
13726   }
13727 
13728   if (IsFeatureCommentRedundant (sfp)) {
13729     sfp->comment = MemFree (sfp->comment);
13730   }
13731 
13732   /* sort and unique gbquals again after recent processing */
13733   sfp->qual = SortFeatureGBQuals (sfp->qual);
13734   CleanupDuplicateGBQuals (&(sfp->qual));
13735   sfp->qual = SortIllegalGBQuals (sfp->qual);
13736 
13737   /* normalize Seq-point fuzz tl to tr and decrement position */
13738   slp = SeqLocFindNext (sfp->location, NULL);
13739   for (slp = SeqLocFindNext (sfp->location, NULL);
13740        slp != NULL;
13741        slp = SeqLocFindNext (sfp->location, slp)) {
13742     if (slp->choice != SEQLOC_PNT) continue;
13743     pntp = (SeqPntPtr) slp->data.ptrvalue;
13744     if (pntp == NULL) continue;
13745     fuzz = pntp->fuzz;
13746     if (fuzz == NULL) continue;
13747     if (fuzz->choice == 4 /* lim */ && fuzz->a == 4 /* tl */ && pntp->point > 0) {
13748       (pntp->point)--;
13749       fuzz->a = 3; /* tr */
13750     }
13751   }
13752 
13753   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
13754   hasNulls = LocationHasNullsBetween (sfp->location);
13755   sfp->partial = (sfp->partial || partial5 || partial3 || (hasNulls && ! isEmblOrDdbj));
13756 
13757   prevlink = (SeqFeatXrefPtr PNTR) &(sfp->xref);
13758   xref = sfp->xref;
13759   while (xref != NULL) {
13760     next = xref->next;
13761 
13762     if (xref->id.choice == 0 && xref->data.choice == 0) {
13763       *prevlink = xref->next;
13764       xref->next = NULL;
13765       MemFree (xref);
13766     } else {
13767       prevlink = (SeqFeatXrefPtr PNTR) &(xref->next);
13768     }
13769 
13770     xref = next;
13771   }
13772 }
13773 
13774 
CleanUpSeqGraph(SeqGraphPtr sgp)13775 static void CleanUpSeqGraph (SeqGraphPtr sgp)
13776 
13777 {
13778   if (sgp == NULL) return;
13779   if (sgp->loc != NULL) {
13780     CleanUpSeqLoc (sgp->loc);
13781   }
13782 }
13783 
RemoveZeroLengthSeqLits(BioseqPtr bsp)13784 static void RemoveZeroLengthSeqLits (BioseqPtr bsp)
13785 {
13786   DeltaSeqPtr dsp, prev = NULL, dsp_next;
13787   SeqLitPtr slip;
13788 
13789   if (bsp == NULL || bsp->repr != Seq_repr_delta) {
13790     return;
13791   }
13792 
13793   for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp_next) {
13794     dsp_next = dsp->next;
13795     if (dsp->choice == 2 && (slip = (SeqLitPtr) (dsp->data.ptrvalue)) != NULL
13796         && slip->length == 0 && slip->seq_data_type == 1
13797         && slip->seq_data != NULL) {
13798       if (prev == NULL) {
13799         bsp->seq_ext = dsp->next;
13800       } else {
13801         prev->next = dsp->next;
13802       }
13803       dsp->next = NULL;
13804       dsp = DeltaSeqFree (dsp);
13805     } else {
13806       prev = dsp;
13807     }
13808   }
13809 }
13810 
13811 /*
13812 static Boolean CleanUpObjId (ObjectIdPtr oip)
13813 
13814 {
13815   size_t   len;
13816   CharPtr  ptr;
13817   Boolean  rval = FALSE;
13818   long     val;
13819 
13820   if (oip == NULL) return FALSE;
13821   if (StringDoesHaveText (oip->str)) {
13822     if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
13823       TrimSpacesAroundString (oip->str);
13824       rval = TRUE;
13825     }
13826   }
13827   ptr = oip->str;
13828   if (ptr != NULL && *ptr != '0' && StringIsAllDigits(ptr)) {
13829     len = StringLen (ptr);
13830     if (len < 10 || (len == 10 && StringCmp (ptr, "2147483647") <= 0)) {
13831       if (sscanf (oip->str, "%ld", &val) == 1) {
13832         oip->id = (Int4) val;
13833         oip->str = MemFree (oip->str);
13834         rval = TRUE;
13835       }
13836     }
13837   }
13838   return rval;
13839 }
13840 
13841 static Boolean CleanUpSeqIdText (SeqIdPtr sip)
13842 {
13843   DbtagPtr     dbt;
13844   ObjectIdPtr  oip;
13845   Boolean      rval = FALSE;
13846 
13847   if (sip == NULL) return FALSE;
13848   if (sip->choice == SEQID_LOCAL) {
13849     oip = (ObjectIdPtr) sip->data.ptrvalue;
13850     if (oip != NULL) {
13851       if (CleanUpObjId (oip)) {
13852         rval = TRUE;
13853       }
13854     }
13855   } else if (sip->choice == SEQID_GENERAL) {
13856     dbt = (DbtagPtr) sip->data.ptrvalue;
13857     if (dbt != NULL) {
13858       oip = dbt->tag;
13859       if (oip != NULL) {
13860         if (CleanUpObjId (oip)) {
13861           rval = TRUE;
13862         }
13863       }
13864     }
13865   }
13866   return rval;
13867 }
13868 */
13869 
13870 
CleanUpSeqIdText(SeqIdPtr sip)13871 static Boolean CleanUpSeqIdText (SeqIdPtr sip)
13872 {
13873   ObjectIdPtr  oip;
13874   Boolean      rval = FALSE;
13875 
13876   if (sip == NULL) return FALSE;
13877   if (sip->choice == SEQID_LOCAL) {
13878     oip = (ObjectIdPtr) sip->data.ptrvalue;
13879     if (oip != NULL) {
13880       if (StringDoesHaveText (oip->str)) {
13881         if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
13882        TrimSpacesAroundString (oip->str);
13883        rval = TRUE;
13884         }
13885       }
13886     }
13887   }
13888   return rval;
13889 }
13890 
CleanUpSeqId(SeqIdPtr sip,Pointer userdata)13891 static void CleanUpSeqId (
13892   SeqIdPtr sip,
13893   Pointer userdata
13894 )
13895 
13896 {
13897   CleanUpSeqIdText (sip);
13898 }
13899 
CleanSeqIdInBioseq(BioseqPtr bsp,Pointer userdata)13900 static void CleanSeqIdInBioseq (BioseqPtr bsp, Pointer userdata)
13901 
13902 {
13903   SeqIdPtr sip;
13904   Boolean  need_reindex = FALSE;
13905 
13906   for (sip = bsp->id; sip != NULL; sip = sip->next) {
13907     if (CleanUpSeqIdText (sip)) {
13908       need_reindex = TRUE;
13909     }
13910   }
13911   if (need_reindex) {
13912     SeqMgrReplaceInBioseqIndex (bsp);
13913   }
13914 }
13915 
CleanSeqIdInSeqFeat(SeqFeatPtr sfp,Pointer userdata)13916 static void CleanSeqIdInSeqFeat (SeqFeatPtr sfp, Pointer userdata)
13917 
13918 {
13919   VisitSeqIdsInSeqFeat (sfp, NULL, CleanUpSeqId);
13920 }
13921 
CleanSeqIdInSeqAlign(SeqAlignPtr sap,Pointer userdata)13922 static void CleanSeqIdInSeqAlign (SeqAlignPtr sap, Pointer userdata)
13923 
13924 {
13925   VisitSeqIdsInSeqAlign (sap, NULL, CleanUpSeqId);
13926 }
13927 
CleanSeqIdInSeqGraph(SeqGraphPtr sgp,Pointer userdata)13928 static void CleanSeqIdInSeqGraph (SeqGraphPtr sgp, Pointer userdata)
13929 
13930 {
13931   VisitSeqIdsInSeqGraph (sgp, NULL, CleanUpSeqId);
13932 }
13933 
CleanSeqIdInSeqAnnot(SeqAnnotPtr annot,Pointer userdata)13934 static void CleanSeqIdInSeqAnnot (SeqAnnotPtr annot, Pointer userdata)
13935 
13936 {
13937   VisitSeqIdsInSeqAnnot (annot, NULL, CleanUpSeqId);
13938 }
13939 
13940 typedef struct npcounts {
13941   Int4     nucs;
13942   Int4     prots;
13943   Boolean  make_genbank;
13944 } NPCounts, PNTR NPCountsPtr;
13945 
CountNucsAndProts(BioseqPtr bsp,Pointer userdata)13946 static void CountNucsAndProts (BioseqPtr bsp, Pointer userdata)
13947 
13948 {
13949   NPCountsPtr  ncp;
13950 
13951   if (bsp == NULL) return;
13952   ncp = (NPCountsPtr) userdata;
13953   if (ncp == NULL) return;
13954 
13955   if (ISA_na (bsp->mol)) {
13956     (ncp->nucs)++;
13957   } else if (ISA_aa (bsp->mol)) {
13958     (ncp->prots)++;
13959   }
13960 }
13961 
CheckInnerSets(BioseqSetPtr bssp,Pointer userdata)13962 static void CheckInnerSets (BioseqSetPtr bssp, Pointer userdata)
13963 
13964 {
13965   NPCountsPtr  ncp;
13966 
13967   if (bssp == NULL) return;
13968   ncp = (NPCountsPtr) userdata;
13969   if (ncp == NULL) return;
13970 
13971   if (bssp->_class == BioseqseqSet_class_segset || bssp->_class == BioseqseqSet_class_parts) return;
13972   ncp->make_genbank = TRUE;
13973 }
13974 
FixBadSetClass(BioseqSetPtr bssp,Pointer userdata)13975 static void FixBadSetClass (BioseqSetPtr bssp, Pointer userdata)
13976 
13977 {
13978   NPCounts  nc;
13979 
13980   if (bssp == NULL) return;
13981   if (bssp->_class != BioseqseqSet_class_not_set && bssp->_class != BioseqseqSet_class_other) return;
13982 
13983   MemSet ((Pointer) &nc, 0, sizeof (NPCounts));
13984   VisitSequencesInSet (bssp, (Pointer) &nc, VISIT_MAINS, CountNucsAndProts);
13985   VisitSetsInSet (bssp, (Pointer) &nc, CheckInnerSets);
13986   if (nc.nucs == 1 && nc.prots > 0 && (! nc.make_genbank)) {
13987     bssp->_class = BioseqseqSet_class_nuc_prot;
13988   } else {
13989     bssp->_class = BioseqseqSet_class_genbank;
13990   }
13991 }
13992 
RemoveDuplicateSeqIds(BioseqPtr bsp)13993 static void RemoveDuplicateSeqIds (BioseqPtr bsp)
13994 
13995 {
13996   SeqIdPtr sip, sip_cmp, sip_prev, sip_next;
13997 
13998   if (bsp == NULL) {
13999     return;
14000   }
14001 
14002   for (sip = bsp->id; sip != NULL; sip = sip->next) {
14003     sip_prev = sip;
14004     for (sip_cmp = sip->next; sip_cmp != NULL; sip_cmp = sip_next) {
14005       sip_next = sip_cmp->next;
14006       if (SeqIdComp (sip, sip_cmp) == SIC_YES) {
14007         sip_prev->next = sip_cmp->next;
14008         sip_cmp->next = NULL;
14009         sip_cmp = SeqIdFree (sip_cmp);
14010       } else {
14011         sip_prev = sip_cmp;
14012       }
14013     }
14014   }
14015 }
14016 
14017 
BasicSeqEntryCleanupInternal(SeqEntryPtr sep,ValNodePtr PNTR publist,Boolean isEmblOrDdbj,Boolean isJscan,Boolean stripSerial)14018 static void BasicSeqEntryCleanupInternal (
14019   SeqEntryPtr sep,
14020   ValNodePtr PNTR publist,
14021   Boolean isEmblOrDdbj,
14022   Boolean isJscan,
14023   Boolean stripSerial
14024 )
14025 
14026 {
14027   BioSourcePtr  biop;
14028   BioseqPtr     bsp;
14029   BioseqSetPtr  bssp;
14030   SeqDescrPtr   desc;
14031   Char          div [10];
14032   GBBlockPtr    gbp;
14033   MolInfoPtr    mip;
14034   OrgNamePtr    onp;
14035   OrgRefPtr     orp;
14036   SeqAnnotPtr   sap = NULL;
14037   ValNodePtr    sdp = NULL;
14038   SeqFeatPtr    sfp;
14039   SeqGraphPtr   sgp;
14040   SeqEntryPtr   tmp;
14041 
14042   if (sep == NULL) return;
14043   if (IS_Bioseq (sep)) {
14044     bsp = (BioseqPtr) sep->data.ptrvalue;
14045     if (bsp == NULL) return;
14046     /* remove duplicate SeqIds on the same Bioseq */
14047     RemoveDuplicateSeqIds (bsp);
14048 
14049     /* repair damaged delta sequences */
14050     RemoveZeroLengthSeqLits (bsp);
14051 
14052     sap = bsp->annot;
14053     sdp = bsp->descr;
14054     desc = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
14055     if (desc != NULL && desc->choice == Seq_descr_molinfo) {
14056       mip = (MolInfoPtr) desc->data.ptrvalue;
14057       if (mip != NULL) {
14058         /* repair if bsp.mol is not-set */
14059         if (bsp->mol == 0) {
14060           switch (mip->biomol) {
14061             case MOLECULE_TYPE_GENOMIC :
14062               bsp->mol = Seq_mol_na;
14063               break;
14064             case MOLECULE_TYPE_PRE_MRNA :
14065             case MOLECULE_TYPE_MRNA :
14066             case MOLECULE_TYPE_RRNA :
14067             case MOLECULE_TYPE_TRNA :
14068             case MOLECULE_TYPE_SNRNA :
14069             case MOLECULE_TYPE_SCRNA :
14070             case MOLECULE_TYPE_CRNA :
14071             case MOLECULE_TYPE_SNORNA :
14072             case MOLECULE_TYPE_TRANSCRIBED_RNA :
14073             case MOLECULE_TYPE_NCRNA :
14074             case MOLECULE_TYPE_TMRNA :
14075               bsp->mol = Seq_mol_rna;
14076               break;
14077             case MOLECULE_TYPE_PEPTIDE :
14078               bsp->mol = Seq_mol_aa;
14079               break;
14080             case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
14081               bsp->mol = Seq_mol_other;
14082               break;
14083             case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
14084               bsp->mol = Seq_mol_na;
14085               break;
14086             default :
14087               break;
14088           }
14089         } else if (bsp->mol != Seq_mol_rna
14090                    && (mip->biomol == MOLECULE_TYPE_CRNA || mip->biomol == MOLECULE_TYPE_MRNA)) {
14091           bsp->mol = Seq_mol_rna;
14092         }
14093       }
14094     }
14095   } else if (IS_Bioseq_set (sep)) {
14096     bssp = (BioseqSetPtr) sep->data.ptrvalue;
14097     if (bssp == NULL) return;
14098     for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
14099       BasicSeqEntryCleanupInternal (tmp, publist, isEmblOrDdbj, isJscan, stripSerial);
14100     }
14101     sap = bssp->annot;
14102     sdp = bssp->descr;
14103   } else return;
14104   biop = NULL;
14105   orp = NULL;
14106   gbp = NULL;
14107   div [0] = '\0';
14108   while (sap != NULL) {
14109     if (sap->type == 1) {
14110       sfp = (SeqFeatPtr) sap->data;
14111       while (sfp != NULL) {
14112         CleanUpSeqFeat (sfp, isEmblOrDdbj, isJscan, stripSerial, TRUE, publist);
14113         sfp = sfp->next;
14114       }
14115     } else if (sap->type == 3) {
14116       sgp = (SeqGraphPtr) sap->data;
14117       while (sgp != NULL) {
14118         CleanUpSeqGraph (sgp);
14119         sgp = sgp->next;
14120       }
14121     }
14122     sap = sap->next;
14123   }
14124   while (sdp != NULL) {
14125     switch (sdp->choice) {
14126       case Seq_descr_org :
14127         orp = (OrgRefPtr) sdp->data.ptrvalue;
14128         break;
14129       case Seq_descr_genbank :
14130         gbp = (GBBlockPtr) sdp->data.ptrvalue;
14131         break;
14132       case Seq_descr_source :
14133         biop = (BioSourcePtr) sdp->data.ptrvalue;
14134         if (biop != NULL) {
14135           orp = biop->org;
14136         }
14137         break;
14138       default :
14139         break;
14140     }
14141     CleanupDescriptorStrings (sdp, stripSerial, TRUE, publist, isEmblOrDdbj);
14142     sdp = sdp->next;
14143   }
14144 
14145   /* copy genbank block division into biosource, if necessary */
14146 
14147   if (orp != NULL && gbp != NULL) {
14148     StringNCpy_0 (div, gbp->div, sizeof (div));
14149     if (StringHasNoText (div)) return;
14150     onp = orp->orgname;
14151     while (onp != NULL) {
14152       if (StringHasNoText (onp->div)) {
14153         onp->div = MemFree (onp->div);
14154         onp->div = StringSaveNoNull (div);
14155       }
14156       onp = onp->next;
14157     }
14158   }
14159 }
14160 
ReplaceCitOnFeat(CitGenPtr cgp,ValNodePtr publist)14161 static void ReplaceCitOnFeat (CitGenPtr cgp, ValNodePtr publist)
14162 
14163 {
14164   ValNodePtr  nxt;
14165   ValNodePtr  vnp;
14166 
14167   for (vnp = publist; vnp != NULL; vnp = vnp->next) {
14168     if (vnp->choice != 1) continue;
14169     if (StringCmp (cgp->cit, (CharPtr) vnp->data.ptrvalue) == 0) {
14170       nxt = vnp->next;
14171       if (nxt != NULL && nxt->choice == 2) {
14172         cgp->cit = MemFree (cgp->cit);
14173         cgp->cit = StringSaveNoNull ((CharPtr) nxt->data.ptrvalue);
14174         if (cgp->cit != NULL) {
14175           if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
14176             cgp->cit [0] = 'U';
14177           }
14178         }
14179       }
14180       return;
14181     }
14182   }
14183 }
14184 
ChangeCitsOnFeats(SeqFeatPtr sfp,Pointer userdata)14185 static void ChangeCitsOnFeats (SeqFeatPtr sfp, Pointer userdata)
14186 
14187 {
14188   CitGenPtr   cgp;
14189   ValNodePtr  ppr;
14190   ValNodePtr  psp;
14191   ValNodePtr  vnp;
14192 
14193   psp = sfp->cit;
14194   if (psp != NULL && psp->data.ptrvalue) {
14195     for (ppr = (ValNodePtr) psp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
14196       vnp = NULL;
14197       if (ppr->choice == PUB_Gen) {
14198         vnp = ppr;
14199       } else if (ppr->choice == PUB_Equiv) {
14200         for (vnp = (ValNodePtr) ppr->data.ptrvalue;
14201              vnp != NULL && vnp->choice != PUB_Gen;
14202              vnp = vnp->next) continue;
14203       }
14204       if (vnp != NULL && vnp->choice == PUB_Gen) {
14205         cgp = (CitGenPtr) vnp->data.ptrvalue;
14206         if (cgp != NULL && (! StringHasNoText (cgp->cit))) {
14207           ReplaceCitOnFeat (cgp, (ValNodePtr) userdata);
14208         }
14209       }
14210     }
14211   }
14212 }
14213 
GetPmidForMuid(ValNodePtr pairlist,Int4 muid)14214 static Int4 GetPmidForMuid (ValNodePtr pairlist, Int4 muid)
14215 
14216 {
14217   ValNodePtr  vnp;
14218 
14219   vnp = pairlist;
14220   while (vnp != NULL) {
14221     if (muid == vnp->data.intvalue) {
14222       vnp = vnp->next;
14223       if (vnp == NULL) return 0;
14224       return vnp->data.intvalue;
14225     } else {
14226       vnp = vnp->next;
14227       if (vnp == NULL) return 0;
14228       vnp = vnp->next;
14229     }
14230   }
14231 
14232   return 0;
14233 }
14234 
ChangeFeatCitsToPmid(SeqFeatPtr sfp,Pointer userdata)14235 static void ChangeFeatCitsToPmid (SeqFeatPtr sfp, Pointer userdata)
14236 
14237 {
14238   Int4        muid = 0;
14239   Int4        pmid = 0;
14240   ValNodePtr  ppr;
14241   ValNodePtr  psp;
14242   ValNodePtr  vnp;
14243 
14244   psp = sfp->cit;
14245   if (psp != NULL && psp->data.ptrvalue) {
14246     for (ppr = (ValNodePtr) psp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
14247       vnp = NULL;
14248       if (ppr->choice == PUB_Muid) {
14249         vnp = ppr;
14250       } else if (ppr->choice == PUB_Equiv) {
14251         for (vnp = (ValNodePtr) ppr->data.ptrvalue;
14252              vnp != NULL && vnp->choice != PUB_Muid;
14253              vnp = vnp->next) continue;
14254       }
14255       if (vnp != NULL && vnp->choice == PUB_Muid) {
14256         muid = vnp->data.intvalue;
14257         if (muid != 0) {
14258           pmid = GetPmidForMuid ((ValNodePtr) userdata, muid);
14259           if (pmid != 0) {
14260             vnp->choice = PUB_PMid;
14261             vnp->data.intvalue = pmid;
14262           }
14263         }
14264       }
14265     }
14266   }
14267 }
14268 
GetMuidPmidPairs(PubdescPtr pdp,Pointer userdata)14269 static void GetMuidPmidPairs (PubdescPtr pdp, Pointer userdata)
14270 
14271 {
14272   Int4        muid = 0;
14273   Int4        pmid = 0;
14274   ValNodePtr  vnp;
14275 
14276   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
14277     switch (vnp->choice) {
14278       case PUB_Muid :
14279         muid = vnp->data.intvalue;
14280         break;
14281       case PUB_PMid :
14282         pmid = vnp->data.intvalue;
14283         break;
14284       default :
14285         break;
14286     }
14287   }
14288   if (muid == 0 || pmid == 0) return;
14289   ValNodeAddInt ((ValNodePtr PNTR) userdata, 0, muid);
14290   ValNodeAddInt ((ValNodePtr PNTR) userdata, 0, pmid);
14291 }
14292 
FlattenPubSet(ValNodePtr PNTR prev)14293 static void FlattenPubSet (ValNodePtr PNTR prev)
14294 
14295 {
14296   ValNodePtr  next;
14297   ValNodePtr  ppr;
14298   ValNodePtr  vnp;
14299 
14300   if (prev == NULL || *prev == NULL) return;
14301   ppr = *prev;
14302   while (ppr != NULL) {
14303     next = ppr->next;
14304 
14305     if (ppr->choice == PUB_Equiv) {
14306       vnp = (ValNodePtr) ppr->data.ptrvalue;
14307       if (vnp != NULL && vnp->next == NULL) {
14308         ppr->choice = vnp->choice;
14309         switch (vnp->choice) {
14310           case PUB_Muid :
14311           case PUB_PMid :
14312             ppr->data.intvalue = vnp->data.intvalue;
14313             break;
14314           default :
14315             ppr->data.ptrvalue = vnp->data.ptrvalue;
14316             break;
14317         }
14318         ValNodeFree (vnp);
14319       }
14320     }
14321 
14322     ppr = next;
14323   }
14324 }
14325 
FlattenDupInPubSet(ValNodePtr PNTR prev)14326 static void FlattenDupInPubSet (ValNodePtr PNTR prev)
14327 
14328 {
14329   ValNodePtr  next;
14330   ValNodePtr  nxt;
14331   ValNodePtr  ppr;
14332   ValNodePtr  vnp;
14333 
14334   if (prev == NULL || *prev == NULL) return;
14335   ppr = *prev;
14336   while (ppr != NULL) {
14337     next = ppr->next;
14338 
14339     if (ppr->choice == PUB_Equiv) {
14340       vnp = (ValNodePtr) ppr->data.ptrvalue;
14341       if (vnp != NULL) {
14342         nxt = vnp->next;
14343         if (nxt != NULL && nxt->next == NULL && vnp->choice == nxt->choice) {
14344           switch (vnp->choice) {
14345             case PUB_Muid :
14346             case PUB_PMid :
14347               if (vnp->data.intvalue == nxt->data.intvalue) {
14348                 vnp->next = ValNodeFree (nxt);
14349               }
14350               break;
14351             default :
14352               break;
14353           }
14354         }
14355       }
14356     }
14357 
14358     ppr = next;
14359   }
14360 }
14361 
FlattenPubdesc(PubdescPtr pdp,Pointer userdata)14362 static void FlattenPubdesc (PubdescPtr pdp, Pointer userdata)
14363 
14364 {
14365   FlattenPubSet (&(pdp->pub));
14366 }
14367 
FlattenSfpCit(SeqFeatPtr sfp,Pointer userdata)14368 static void FlattenSfpCit (SeqFeatPtr sfp, Pointer userdata)
14369 
14370 {
14371   ValNodePtr  psp;
14372 
14373   psp = sfp->cit;
14374   if (psp == NULL) return;
14375   FlattenDupInPubSet ((ValNodePtr PNTR) &(psp->data.ptrvalue));
14376   FlattenPubSet ((ValNodePtr PNTR) &(psp->data.ptrvalue));
14377 }
14378 
14379 typedef struct fastnode {
14380   ValNodePtr  head;
14381   ValNodePtr  tail;
14382 } FastNode, PNTR FastNodePtr;
14383 
GetCitGenLabels(PubdescPtr pdp,Pointer userdata)14384 static void GetCitGenLabels (PubdescPtr pdp, Pointer userdata)
14385 
14386 {
14387   Char             buf [121];
14388   CitGenPtr        cgp;
14389   FastNodePtr      labellist;
14390   ValNodePtr       tmp;
14391   ValNodePtr       vnp;
14392 
14393   if (pdp == NULL) return;
14394   labellist = (FastNodePtr) userdata;
14395   if (labellist == NULL) return;
14396 
14397   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
14398     if (vnp->choice != PUB_Gen) continue;
14399     cgp = (CitGenPtr) vnp->data.ptrvalue;
14400     if (cgp == NULL) continue;
14401     if (cgp->cit == NULL && cgp->journal == NULL &&
14402         cgp->date == NULL && cgp->serial_number) continue;
14403     PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE);
14404     tmp = ValNodeCopyStr (&(labellist->tail), 0, buf);
14405     if (labellist->head == NULL) {
14406       labellist->head = tmp;
14407     }
14408     labellist->tail = tmp;
14409   }
14410 }
14411 
ReplaceShortCitGenOnFeat(CitGenPtr cgp,ValNodePtr labellist)14412 static void ReplaceShortCitGenOnFeat (CitGenPtr cgp, ValNodePtr labellist)
14413 
14414 {
14415   Char        buf [128];
14416   Char        ch;
14417   size_t      len1;
14418   size_t      len2;
14419   CharPtr     ptr;
14420   CharPtr     str;
14421   CharPtr     tmp;
14422   ValNodePtr  vnp;
14423 
14424   for (vnp = labellist; vnp != NULL; vnp = vnp->next) {
14425     str = (CharPtr) vnp->data.ptrvalue;
14426     if (StringHasNoText (str)) continue;
14427     len1 = StringLen (cgp->cit);
14428     if (len1 < 2 || len1 > 120) continue;
14429     StringCpy (buf, cgp->cit);
14430     ptr = StringStr (buf, "Unpublished");
14431     if (ptr != NULL) {
14432       ptr += 11;
14433       *ptr = '\0';
14434       tmp = StringStr (cgp->cit, "Unpublished");
14435       if (tmp != NULL) {
14436         tmp += 11;
14437         ch = *tmp;
14438         while (ch == ' ') {
14439           tmp++;
14440           ch = *tmp;
14441         }
14442         StringCat (buf, tmp);
14443       }
14444     }
14445     len1 = StringLen (buf);
14446     if (buf [len1 - 1] != '>') continue;
14447     len1--;
14448     len2 = StringLen (str);
14449     if (len1 >= len2) continue;
14450     if (StringNCmp (str, buf, len1) == 0) {
14451       cgp->cit = MemFree (cgp->cit);
14452       cgp->cit = StringSaveNoNull (str);
14453       if (cgp->cit != NULL) {
14454         if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
14455           cgp->cit [0] = 'U';
14456         }
14457       }
14458       return;
14459     }
14460   }
14461 }
14462 
UpdateShortFeatCits(SeqFeatPtr sfp,Pointer userdata)14463 static void UpdateShortFeatCits (SeqFeatPtr sfp, Pointer userdata)
14464 
14465 {
14466   CitGenPtr   cgp;
14467   ValNodePtr  ppr;
14468   ValNodePtr  psp;
14469   ValNodePtr  vnp;
14470 
14471   psp = sfp->cit;
14472   if (psp != NULL && psp->data.ptrvalue) {
14473     for (ppr = (ValNodePtr) psp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
14474       vnp = NULL;
14475       if (ppr->choice == PUB_Gen) {
14476         vnp = ppr;
14477       } else if (ppr->choice == PUB_Equiv) {
14478         for (vnp = (ValNodePtr) ppr->data.ptrvalue;
14479              vnp != NULL && vnp->choice != PUB_Gen;
14480              vnp = vnp->next) continue;
14481       }
14482       if (vnp != NULL && vnp->choice == PUB_Gen) {
14483         cgp = (CitGenPtr) vnp->data.ptrvalue;
14484         if (cgp != NULL && (! StringHasNoText (cgp->cit))) {
14485           ReplaceShortCitGenOnFeat (cgp, (ValNodePtr) userdata);
14486         }
14487       }
14488     }
14489   }
14490 }
14491 
14492 //LCOV_EXCL_START
BasicSeqAnnotCleanup(SeqAnnotPtr sap)14493 NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap)
14494 
14495 {
14496   SeqFeatPtr   sfp;
14497   SeqGraphPtr  sgp;
14498 
14499   if (sap == NULL) return;
14500 
14501   VisitSeqIdsInSeqAnnot (sap, NULL, CleanUpSeqId);
14502 
14503   if (sap->type == 1) {
14504     sfp = (SeqFeatPtr) sap->data;
14505     while (sfp != NULL) {
14506       CleanUpSeqFeat (sfp, FALSE, FALSE, TRUE, TRUE, NULL);
14507       sfp = sfp->next;
14508     }
14509   } else if (sap->type == 3) {
14510     sgp = (SeqGraphPtr) sap->data;
14511     while (sgp != NULL) {
14512       CleanUpSeqGraph (sgp);
14513       sgp = sgp->next;
14514     }
14515   }
14516 }
14517 //LCOV_EXCL_STOP
14518 
14519 /*
14520 static CharPtr proteinOrganellePrefix [] = {
14521   NULL,
14522   NULL,
14523   "chloroplast",
14524   "chromoplast",
14525   "kinetoplast",
14526   "mitochondrion",
14527   "plastid",
14528   "macronuclear",
14529   "extrachromosomal",
14530   "plasmid",
14531   NULL,
14532   NULL,
14533   "cyanelle",
14534   "proviral",
14535   "virus",
14536   "nucleomorph",
14537   "apicoplast",
14538   "leucoplast",
14539   "protoplast",
14540   "endogenous virus",
14541   "hydrogenosome",
14542   "chromosome",
14543   "chromatophore"
14544 };
14545 */
14546 
14547 static CharPtr proteinOrganellePrefix [] = {
14548   NULL,
14549   NULL,
14550   "chloroplast",
14551   "chromoplast",
14552   "kinetoplast",
14553   "mitochondrion",
14554   "plastid",
14555   "macronuclear",
14556   NULL,
14557   "plasmid",
14558   NULL,
14559   NULL,
14560   "cyanelle",
14561   NULL,
14562   NULL,
14563   "nucleomorph",
14564   "apicoplast",
14565   "leucoplast",
14566   "protoplast",
14567   "endogenous virus",
14568   "hydrogenosome",
14569   NULL,
14570   "chromatophore"
14571 };
14572 
TitleEndsInOrganism(CharPtr title,CharPtr organism,CharPtr organelle,CharPtr PNTR onlp,BoolPtr case_diffp)14573 static CharPtr TitleEndsInOrganism (
14574   CharPtr title,
14575   CharPtr organism,
14576   CharPtr organelle,
14577   CharPtr PNTR onlp,
14578   BoolPtr case_diffp
14579 )
14580 
14581 {
14582   int      genome;
14583   size_t   len1, len2, len3;
14584   CharPtr  onl, ptr, tmp;
14585 
14586   if (onlp != NULL) {
14587     *onlp = NULL;
14588   }
14589   if (case_diffp != NULL) {
14590     *case_diffp = FALSE;
14591   }
14592   if (StringHasNoText (title) || StringHasNoText (organism)) return NULL;
14593   len1 = StringLen (title);
14594   len2 = StringLen (organism);
14595   if (len2 + 4 > len1) return NULL;
14596 
14597   tmp = title + len1 - len2 - 3;
14598   if (tmp [0] != ' ' || tmp [1] != '[' || tmp [len2 + 2] != ']') return NULL;
14599   if (StringNICmp (tmp + 2, organism, len2) != 0) return NULL;
14600   if (StringNCmp (tmp + 2, organism, len2) != 0 && case_diffp != NULL) {
14601     *case_diffp = TRUE;
14602   }
14603 
14604   if (onlp != NULL) {
14605     len3 = len1 - len2 - 3;
14606     for (genome = GENOME_chloroplast; genome <= GENOME_chromatophore; genome++) {
14607       ptr = proteinOrganellePrefix [genome];
14608       if (ptr == NULL) continue;
14609       len2 = StringLen (ptr);
14610       if (len2 + 4 >= len3) continue;
14611       onl = title + len3 - len2 - 3;
14612       if (onl [0] != ' ' || onl [1] != '(' || onl [len2 + 2] != ')') continue;
14613       if (StringNICmp (onl + 2, ptr, len2) != 0) continue;
14614       *onlp = onl;
14615       break;
14616     }
14617   }
14618 
14619   return tmp;
14620 }
14621 
RemoveOrgFromEndOfProtein(SeqFeatPtr sfp,Pointer userdata)14622 static void RemoveOrgFromEndOfProtein (SeqFeatPtr sfp, Pointer userdata)
14623 
14624 {
14625   CharPtr     cp;
14626   size_t      len;
14627   ProtRefPtr  prp;
14628   CharPtr     str;
14629   CharPtr     taxname;
14630   ValNodePtr  vnp;
14631 
14632   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
14633   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
14634   if (prp == NULL) return;
14635 
14636   taxname = (CharPtr) userdata;
14637   if (StringHasNoText (taxname)) return;
14638 
14639   for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
14640     str = (CharPtr) vnp->data.ptrvalue;
14641     if (StringHasNoText (str)) continue;
14642     len = StringLen (str);
14643     if (len < 5) continue;
14644     if (str [len - 1] != ']') continue;
14645     cp = StringRChr (str, '[');
14646     if (cp == NULL) continue;
14647     if (StringNCmp (cp, "[NAD", 4) == 0) continue;
14648     len = StringLen (taxname);
14649     if (StringLen (cp) != len + 2) continue;
14650     if (StringNICmp (cp + 1, taxname, len - 1) != 0) continue;
14651     *cp = '\0';
14652     TrimSpacesAroundString (str);
14653   }
14654 }
14655 
AddPartialToProteinTitle(BioseqPtr bsp,Pointer userdata)14656 static void AddPartialToProteinTitle (
14657   BioseqPtr bsp,
14658   Pointer userdata
14659 )
14660 
14661 {
14662   CharPtr             binomial = NULL;
14663   BioSourcePtr        biop;
14664   BinomialOrgNamePtr  bonp;
14665   Boolean             case_difference = FALSE;
14666   CharPtr             first_super_kingdom = NULL;
14667   int                 genome = 0;
14668   CharPtr             genus = NULL;
14669   Boolean             is_cross_kingdom = FALSE;
14670   Boolean             is_wp = FALSE;
14671   size_t              len;
14672   MolInfoPtr          mip;
14673   Int2                num_super_kingdom = 0;
14674   CharPtr             oldname = NULL;
14675   OrgModPtr           omp;
14676   OrgNamePtr          onp;
14677   CharPtr             organelle = NULL;
14678   OrgRefPtr           orp;
14679   Boolean             partial = FALSE;
14680   CharPtr             penult = NULL;
14681   CharPtr             ptr;
14682   SeqDescrPtr         sdp;
14683   CharPtr             second_super_kingdom = NULL;
14684   SeqIdPtr            sip;
14685   CharPtr             species = NULL;
14686   CharPtr             str;
14687   CharPtr             suffix = NULL;
14688   Boolean             super_kingdoms_different = FALSE;
14689   CharPtr             taxname = NULL;
14690   TaxElementPtr       tep;
14691   CharPtr             title;
14692   CharPtr             tmp;
14693   TextSeqIdPtr        tsip;
14694   SeqDescrPtr         ttl = NULL;
14695 
14696   if (bsp == NULL) return;
14697   if (! ISA_aa (bsp->mol)) return;
14698 
14699   for (sip = bsp->id; sip != NULL; sip = sip->next) {
14700     if (sip->choice == SEQID_SWISSPROT) return;
14701     if (sip->choice == SEQID_OTHER) {
14702       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
14703       if (tsip != NULL && tsip->accession != NULL) {
14704         if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
14705           is_wp = TRUE;
14706         }
14707       }
14708     }
14709   }
14710 
14711   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
14712   if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
14713     mip = (MolInfoPtr) sdp->data.ptrvalue;
14714     if (mip != NULL && mip->completeness > 1 && mip->completeness < 6) {
14715       partial = TRUE;
14716     }
14717   }
14718 
14719   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
14720   if (sdp != NULL && sdp->choice == Seq_descr_source) {
14721     biop = (BioSourcePtr) sdp->data.ptrvalue;
14722     if (biop != NULL) {
14723       genome = biop->genome;
14724       if (genome >= GENOME_chloroplast && genome <= GENOME_chromatophore) {
14725         organelle = proteinOrganellePrefix [genome];
14726       }
14727       orp = biop->org;
14728       if (orp != NULL) {
14729         taxname = orp->taxname;
14730         /*
14731         if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0) {
14732           organelle = NULL;
14733         }
14734         */
14735         onp = orp->orgname;
14736         if (onp != NULL) {
14737           if (onp->choice == 1) {
14738             bonp = (BinomialOrgNamePtr) onp->data;
14739             if (bonp != NULL) {
14740               genus = bonp->genus;
14741               species = bonp->species;
14742             }
14743           }
14744           for (omp = onp->mod; omp != NULL; omp = omp->next) {
14745             if (omp->subtype == ORGMOD_old_name) {
14746               oldname = omp->subname;
14747             }
14748           }
14749         }
14750       }
14751     }
14752   }
14753 
14754   VisitFeaturesOnBsp (bsp, (Pointer) taxname, RemoveOrgFromEndOfProtein);
14755 
14756   ttl = BioseqGetSeqDescr (bsp, Seq_descr_title, NULL);
14757   if (ttl == NULL || ttl->choice != Seq_descr_title) return;
14758   str = (CharPtr) ttl->data.ptrvalue;
14759   if (StringHasNoText (str)) return;
14760 
14761   if (is_wp) {
14762     for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
14763          sdp != NULL;
14764          sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, sdp)) {
14765       if (sdp->choice != Seq_descr_source) continue;
14766       biop = (BioSourcePtr) sdp->data.ptrvalue;
14767       if (biop == NULL) continue;
14768       orp = biop->org;
14769       if (orp == NULL) continue;
14770       onp = orp->orgname;
14771       if (onp == NULL) continue;
14772       if (onp->choice != 5) continue;
14773       for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
14774         if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
14775           num_super_kingdom++;
14776           if (first_super_kingdom == NULL) {
14777             first_super_kingdom = tep->name;
14778           } else if (StringICmp (first_super_kingdom, tep->name) != 0) {
14779             second_super_kingdom = tep->name;
14780             super_kingdoms_different = TRUE;
14781           }
14782           if (num_super_kingdom > 1 && super_kingdoms_different) {
14783             is_cross_kingdom = TRUE;
14784           }
14785         }
14786       }
14787     }
14788   }
14789 
14790   /* search for partial, must be just before parenthesized organelle or bracketed organism */
14791   tmp = StringSearch (str, ", partial [");
14792   if (tmp == NULL) {
14793     tmp = StringSearch (str, ", partial (");
14794   }
14795 
14796   /* find oldname or taxname in brackets at end of protein title */
14797   if (oldname != NULL && taxname != NULL) {
14798     suffix = TitleEndsInOrganism (str, oldname, organelle, &penult, &case_difference);
14799   }
14800   if (suffix == NULL && taxname != NULL) {
14801     suffix = TitleEndsInOrganism (str, taxname, organelle, &penult, &case_difference);
14802     if (suffix == NULL && StringDoesHaveText (genus) && StringDoesHaveText (species)) {
14803       len = StringLen (genus) + StringLen (species) + 5;
14804       binomial = (CharPtr) MemNew (len);
14805       if (binomial != NULL) {
14806         StringCpy (binomial, genus);
14807         StringCat (binomial, " ");
14808         StringCat (binomial, species);
14809         suffix = TitleEndsInOrganism (str, binomial, organelle, &penult, &case_difference);
14810       }
14811     }
14812     if (suffix == NULL && is_cross_kingdom) {
14813       ptr = StringStr (str, "][");
14814       if (ptr != NULL) {
14815         *(ptr + 1) = '\0';
14816         suffix = TitleEndsInOrganism (str, taxname, organelle, &penult, &case_difference);
14817       }
14818     } else {
14819       if (organelle == NULL && penult != NULL) {
14820       } else if (organelle != NULL && penult == NULL) {
14821       } else if (StringCmp (organelle, penult) != 0) {
14822       } else if (binomial != NULL) {
14823       } else if (case_difference) {
14824       } else {
14825         /* bail if no need to change partial text (organelle) [organism name] */
14826         if (partial) {
14827           if (tmp != NULL) return;
14828         } else {
14829           if (tmp == NULL) return;
14830         }
14831       }
14832     }
14833   }
14834 
14835   binomial = MemFree (binomial);
14836 
14837   /* do not change unless [genus species] was at the end */
14838   if (suffix == NULL) return;
14839 
14840   /* truncate bracketed info from end of title, will replace with current taxname */
14841   *suffix = '\0';
14842   suffix = taxname;
14843 
14844   /* truncate parenthesized info from just before bracketed taxname, will replace with current organelle */
14845   if (penult != NULL) {
14846     *penult = '\0';
14847   }
14848 
14849   /* if ", partial [/(" was indeed just before the [genus species] or (organelle), it will now be ", partial" */
14850   if (! partial && tmp != NULL && StringCmp (tmp, ", partial") == 0) {
14851     *tmp = '\0';
14852   }
14853   TrimSpacesAroundString (str);
14854 
14855   len = StringLen (str) + StringLen (organelle) + StringLen (suffix) + StringLen (first_super_kingdom) + StringLen (second_super_kingdom) + 20;
14856   title = MemNew (sizeof (Char) * len);
14857   if (title == NULL) return;
14858 
14859   StringCpy (title, str);
14860   if (partial && tmp == NULL) {
14861     StringCat (title, ", partial");
14862   }
14863   if (organelle != NULL) {
14864     StringCat (title, " (");
14865     StringCat (title, organelle);
14866     StringCat (title, ")");
14867   }
14868   if (is_cross_kingdom && StringDoesHaveText (first_super_kingdom) && StringDoesHaveText (second_super_kingdom)) {
14869     StringCat (title, " [");
14870     StringCat (title, first_super_kingdom);
14871     StringCat (title, "][");
14872     StringCat (title, second_super_kingdom);
14873     StringCat (title, "]");
14874   } else if (suffix != NULL) {
14875     StringCat (title, " [");
14876     StringCat (title, suffix);
14877     StringCat (title, "]");
14878   }
14879   MemFree (str);
14880   ttl->data.ptrvalue = title;
14881 }
14882 
14883 //LCOV_EXCL_START
CleanUpProteinTitles(SeqEntryPtr sep)14884 NLM_EXTERN void CleanUpProteinTitles (SeqEntryPtr sep)
14885 
14886 {
14887   if (sep == NULL) return;
14888   VisitBioseqsInSep (sep, NULL, AddPartialToProteinTitle);
14889 }
14890 //LCOV_EXCL_STOP
14891 
BasicSeqEntryCleanupEx(SeqEntryPtr sep,Boolean resync)14892 static void BasicSeqEntryCleanupEx (SeqEntryPtr sep, Boolean resync)
14893 
14894 {
14895   AuthorPtr       ap;
14896   ContactInfoPtr  cip;
14897   CitSubPtr       csp;
14898   Uint2           entityID;
14899   Boolean         isEmblOrDdbj = FALSE;
14900   Boolean         isJscan = FALSE;
14901   FastNode        labelnode;
14902   ValNodePtr      pairlist = NULL;
14903   ValNodePtr      publist = NULL;
14904   SeqEntryPtr     oldscope;
14905   ObjMgrDataPtr   omdp;
14906   SubmitBlockPtr  sbp;
14907   SeqSubmitPtr    ssp;
14908   Boolean         stripSerial = TRUE;
14909 
14910   if (sep == NULL) return;
14911 
14912   /* InGpsGenomic needs idx fields assigned */
14913 
14914   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
14915   AssignIDsInEntityEx (entityID, 0, NULL, NULL);
14916 
14917   /* HandleXrefOnCDS call to GetBestProteinFeatureUnindexed now scoped within record */
14918 
14919   oldscope = SeqEntrySetScope (sep);
14920 
14921   /* clean up spaces in local IDs */
14922 
14923   VisitBioseqsInSep (sep, NULL, CleanSeqIdInBioseq);
14924   VisitFeaturesInSep (sep, NULL, CleanSeqIdInSeqFeat);
14925   VisitAlignmentsInSep (sep, NULL, CleanSeqIdInSeqAlign);
14926   VisitGraphsInSep (sep, NULL, CleanSeqIdInSeqGraph);
14927   VisitAnnotsInSep (sep, NULL, CleanSeqIdInSeqAnnot);
14928 
14929   /* Fix Bioseq-sets with class 0 */
14930 
14931   VisitSetsInSep (sep, NULL, FixBadSetClass);
14932 
14933   /* removed unnecessarily nested Pub-equivs */
14934 
14935   VisitPubdescsInSep (sep, NULL, FlattenPubdesc);
14936   VisitFeaturesInSep (sep, NULL, FlattenSfpCit);
14937 
14938   SeqEntryExplore (sep, (Pointer) &stripSerial, CheckForSwissProtID);
14939   SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
14940   SeqEntryExplore (sep, (Pointer) &isJscan, CheckForJournalScanID);
14941 #ifdef SUPPRESS_STRIP_SERIAL_DIFFERENCES
14942   stripSerial = FALSE;
14943 #endif
14944 
14945   BasicSeqEntryCleanupInternal (sep, &publist, isEmblOrDdbj, isJscan, stripSerial);
14946   if (publist != NULL) {
14947     VisitFeaturesInSep (sep, (Pointer) publist, ChangeCitsOnFeats);
14948   }
14949   ValNodeFreeData (publist);
14950 
14951   /* now get muid/pmid pairs, update sfp->cits to pmids */
14952 
14953   VisitPubdescsInSep (sep, (Pointer) &pairlist, GetMuidPmidPairs);
14954   if (pairlist != NULL) {
14955     VisitFeaturesInSep (sep, (Pointer) pairlist, ChangeFeatCitsToPmid);
14956   }
14957   ValNodeFree (pairlist);
14958 
14959   labelnode.head = NULL;
14960   labelnode.tail = NULL;
14961   VisitPubdescsInSep (sep, (Pointer) &labelnode, GetCitGenLabels);
14962   if (labelnode.head != NULL) {
14963     VisitFeaturesInSep (sep, (Pointer) labelnode.head, UpdateShortFeatCits);
14964   }
14965   ValNodeFreeData (labelnode.head);
14966 
14967   SeqEntrySetScope (oldscope);
14968 
14969   /* also normalize authors on submit block citation */
14970 
14971   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
14972   omdp = ObjMgrGetData (entityID);
14973   if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
14974     ssp = (SeqSubmitPtr) omdp->dataptr;
14975     if (ssp != NULL && ssp->datatype == 1) {
14976       sbp = ssp->sub;
14977       if (sbp != NULL) {
14978         csp = sbp->cit;
14979         if (csp != NULL) {
14980           NormalizeAuthors (csp->authors, TRUE);
14981         }
14982         cip = sbp->contact;
14983         if (cip != NULL) {
14984           ap = cip->contact;
14985           if (ap != NULL) {
14986             ap->affil = CleanAffil (ap->affil);
14987           }
14988         }
14989       }
14990     }
14991   }
14992 
14993   if (resync) {
14994     ResynchCodingRegionPartials (sep);
14995     ResynchMessengerRNAPartials (sep);
14996     ResynchProteinPartials (sep);
14997   }
14998 
14999   /*
15000   dynamically add missing partial to already instantiated protein
15001   titles, in between main title and bracketed organism name
15002   */
15003 
15004   VisitBioseqsInSep (sep, NULL, AddPartialToProteinTitle);
15005 }
15006 
BasicSeqEntryCleanup(SeqEntryPtr sep)15007 NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
15008 
15009 {
15010   BasicSeqEntryCleanupEx (sep, FALSE);
15011 }
15012 
15013 //LCOV_EXCL_START
AdvancedSeqEntryCleanup(SeqEntryPtr sep)15014 NLM_EXTERN void AdvancedSeqEntryCleanup (SeqEntryPtr sep)
15015 
15016 {
15017   BasicSeqEntryCleanupEx (sep, TRUE);
15018 }
15019 //LCOV_EXCL_STOP
15020 
15021 typedef struct bsecsmfedata {
15022   Int4  max;
15023   Int4  num_at_max;
15024 } BsecSmfeData, PNTR BsecSmfePtr;
15025 
BsecSMFEProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)15026 static Boolean LIBCALLBACK BsecSMFEProc (
15027   SeqFeatPtr sfp,
15028   SeqMgrFeatContextPtr context
15029 )
15030 
15031 
15032 {
15033   BsecSmfePtr  bsp;
15034   Int4         len;
15035 
15036   if (sfp == NULL || context == NULL) return TRUE;
15037   bsp = context->userdata;
15038   if (bsp == NULL) return TRUE;
15039 
15040   len = SeqLocLen (sfp->location);
15041   if (len < bsp->max) {
15042     bsp->max = len;
15043     bsp->num_at_max = 1;
15044   } else if (len == bsp->max) {
15045     (bsp->num_at_max)++;
15046   }
15047 
15048   return TRUE;
15049 }
15050 
RemoveUnnecessaryGeneXrefs(SeqFeatPtr sfp,Pointer userdata)15051 NLM_EXTERN void RemoveUnnecessaryGeneXrefs (
15052   SeqFeatPtr sfp,
15053   Pointer userdata
15054 )
15055 
15056 {
15057   BsecSmfeData         bsd;
15058   SeqFeatPtr           cds;
15059   Int2                 count;
15060   SeqFeatXrefPtr       curr, next;
15061   SeqMgrFeatContext    fcontext;
15062   GeneRefPtr           grp, grpx;
15063   SeqFeatXrefPtr PNTR  last;
15064   BioseqPtr            prd;
15065   SeqFeatPtr           sfpx;
15066   CharPtr              syn1, syn2;
15067 
15068   if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) return;
15069   grp = SeqMgrGetGeneXref (sfp);
15070   if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return;
15071 
15072   grpx = NULL;
15073   sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
15074   if (sfpx != NULL) {
15075     if (sfpx->data.choice != SEQFEAT_GENE) return;
15076     grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
15077   } else {
15078     prd = BioseqFindFromSeqLoc (sfp->location);
15079     if (prd != NULL && ISA_aa (prd->mol)) {
15080       cds = SeqMgrGetCDSgivenProduct (prd, NULL);
15081       if (cds != NULL) {
15082         grpx = SeqMgrGetGeneXref (cds);
15083         if (grpx == NULL) {
15084           sfpx = SeqMgrGetOverlappingGene (cds->location, &fcontext);
15085           if (sfpx != NULL && sfpx->data.choice == SEQFEAT_GENE) {
15086             grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
15087           }
15088         }
15089       }
15090     }
15091   }
15092   if (grpx == NULL || SeqMgrGeneIsSuppressed (grp)) return;
15093 
15094   if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
15095     if (StringICmp (grp->locus_tag, grpx->locus_tag) != 0) return;
15096   } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
15097     if (StringICmp (grp->locus, grpx->locus) != 0) return;
15098   } else if (grp->syn != NULL && grpx->syn != NULL) {
15099     syn1 = (CharPtr) grp->syn->data.ptrvalue;
15100     syn2 = (CharPtr) grpx->syn->data.ptrvalue;
15101     if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
15102       if (StringICmp (syn1, syn2) != 0) return;
15103     }
15104   }
15105 
15106   MemSet ((Pointer) &bsd, 0, sizeof (BsecSmfeData));
15107   bsd.max = INT4_MAX;
15108   bsd.num_at_max = 0;
15109   count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE,
15110                                            NULL, 0, LOCATION_SUBSET,
15111                                            (Pointer) &bsd, BsecSMFEProc);
15112 
15113   if (bsd.num_at_max < 2) {
15114     last = (SeqFeatXrefPtr PNTR) &(sfp->xref);
15115     curr = sfp->xref;
15116     while (curr != NULL) {
15117       next = curr->next;
15118       if (curr->data.choice == SEQFEAT_GENE) {
15119         *last = next;
15120         curr->next = NULL;
15121         SeqFeatXrefFree (curr);
15122       } else {
15123         last = &(curr->next);
15124       }
15125       curr = next;
15126     }
15127   }
15128 }
15129 
15130 //LCOV_EXCL_START
SortSeqFeatFields(SeqFeatPtr sfp,Pointer userdata)15131 static void SortSeqFeatFields (
15132   SeqFeatPtr sfp,
15133   Pointer userdata
15134 )
15135 
15136 {
15137   CdRegionPtr  crp;
15138   ValNodePtr   psp;
15139 
15140   if (sfp == NULL) return;
15141 
15142   sfp->qual = SortFeatureGBQuals (sfp->qual);
15143 
15144   sfp->qual = SortIllegalGBQuals (sfp->qual);
15145 
15146   sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref);
15147 
15148   psp = sfp->cit;
15149   if (psp != NULL && psp->data.ptrvalue) {
15150     psp->data.ptrvalue = ValNodeSort ((ValNodePtr) psp->data.ptrvalue, SortCits);
15151   }
15152 
15153   if (sfp->data.choice == SEQFEAT_CDREGION) {
15154     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
15155     if (crp != NULL) {
15156       crp->code_break = SortCodeBreaks (sfp, crp->code_break);
15157     }
15158   }
15159 }
15160 
SortBioSourceFields(BioSourcePtr biop,Pointer userdata)15161 static void SortBioSourceFields (
15162   BioSourcePtr biop,
15163   Pointer userdata
15164 )
15165 
15166 {
15167   OrgNamePtr  onp;
15168   OrgRefPtr   orp;
15169 
15170   if (biop == NULL) return;
15171 
15172   orp = biop->org;
15173   if (orp != NULL) {
15174     orp->db = ValNodeSort (orp->db, SortDbxref);
15175 
15176     orp->syn = ValNodeSort (orp->syn, SortVnpByString);
15177     orp->syn = UniqueValNode (orp->syn);
15178 
15179     for (onp = orp->orgname; onp != NULL; onp = onp->next) {
15180       onp->mod = SortOrgModList (onp->mod);
15181     }
15182   }
15183 
15184   biop->subtype = SortSubSourceList (biop->subtype);
15185 }
15186 
SortSeqEntryQualifiers(SeqEntryPtr sep)15187 NLM_EXTERN void SortSeqEntryQualifiers (
15188   SeqEntryPtr sep
15189 )
15190 
15191 {
15192   if (sep == NULL) return;
15193 
15194   VisitFeaturesInSep (sep, NULL, SortSeqFeatFields);
15195   VisitBioSourcesInSep (sep, NULL, SortBioSourceFields);
15196 }
15197 //LCOV_EXCL_STOP
15198 
15199 /* end BasicSeqEntryCleanup section */
15200 
CDSPartialsFromTranslation(SeqFeatPtr sfp,Pointer userdata)15201 NLM_EXTERN void CDSPartialsFromTranslation (SeqFeatPtr sfp, Pointer userdata)
15202 
15203 {
15204   Int4          i;
15205   Int4          len;
15206   ByteStorePtr  newprot;
15207   Boolean       partial5 = FALSE;
15208   Boolean       partial3 = TRUE;
15209   CharPtr       protseq;
15210   Int2          residue;
15211 
15212   if (sfp == NULL) return;
15213   if (sfp->data.choice != SEQFEAT_CDREGION) return;
15214 
15215   newprot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, NULL, FALSE);
15216   if (newprot == NULL) return;
15217 
15218   protseq = BSMerge (newprot, NULL);
15219   if (protseq != NULL) {
15220     len = StringLen (protseq);
15221 
15222     for (i = 0; i < len; i++) {
15223       residue = protseq [i];
15224       if (i == 0 && residue == '-') {
15225         partial5 = TRUE;
15226       }
15227       if (i == len - 1 && residue == '*') {
15228         partial3 = FALSE;
15229       }
15230     }
15231 
15232     MemFree (protseq);
15233 
15234     SetSeqLocPartial (sfp->location, partial5, partial3);
15235     sfp->partial = (Boolean) (partial5 || partial3);
15236   }
15237 
15238   BSFree (newprot);
15239 }
15240 
CodingRegionPartialsFromTranslation(SeqEntryPtr sep)15241 NLM_EXTERN void CodingRegionPartialsFromTranslation (SeqEntryPtr sep)
15242 
15243 {
15244   VisitFeaturesInSep (sep, NULL, CDSPartialsFromTranslation);
15245 }
15246 
ImposeGenePartials(SeqFeatPtr sfp,Pointer userdata)15247 NLM_EXTERN void ImposeGenePartials (SeqFeatPtr sfp, Pointer userdata)
15248 
15249 {
15250   BioseqPtr          bsp;
15251   SeqMgrFeatContext  fcontext, gcontext;
15252   SeqFeatPtr         feat, longest = NULL;
15253   Int4               len, min = INT4_MAX;
15254   Boolean            new_partial, partial5, partial3;
15255 
15256   if (sfp == NULL) return;
15257   if (sfp->data.choice != SEQFEAT_GENE) return;
15258 
15259   bsp = BioseqFindFromSeqLoc (sfp->location);
15260   if (bsp == NULL) return;
15261 
15262   if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &gcontext) != sfp) return;
15263 
15264   feat = SeqMgrGetDesiredFeature (0, bsp, 0, gcontext.index + 1, NULL, &fcontext);
15265   while (feat != NULL && gcontext.right >= fcontext.left) {
15266     len = TestFeatOverlap(feat, sfp, CONTAINED_WITHIN);
15267     if (len >= 0) {
15268       if (len < min) {
15269         min = len;
15270         longest = feat;
15271       }
15272     }
15273     feat = SeqMgrGetNextFeature (bsp, feat, 0, 0, &fcontext);
15274   }
15275 
15276   if (longest != NULL) {
15277     CheckSeqLocForPartial (longest->location, &partial5, &partial3);
15278     new_partial = (Boolean) (longest->partial || partial5 || partial3);
15279     SetSeqLocPartial (sfp->location, partial5, partial3);
15280     sfp->partial = new_partial;
15281   }
15282 }
15283 
ImposeCDSPartials(SeqFeatPtr sfp,Pointer userdata)15284 NLM_EXTERN void ImposeCDSPartials (SeqFeatPtr sfp, Pointer userdata)
15285 
15286 {
15287   SeqFeatPtr  mrna;
15288   Boolean     new_partial, partial5, partial3;
15289 
15290   if (sfp == NULL) return;
15291   if (sfp->data.choice != SEQFEAT_CDREGION) return;
15292 
15293   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15294   new_partial = (Boolean) (sfp->partial || partial5 || partial3);
15295   if (new_partial != sfp->partial) {
15296     sfp->partial = new_partial;
15297   }
15298 
15299   mrna = GetmRNAforCDS (sfp);
15300   if (mrna != NULL) {
15301     SetSeqLocPartial (mrna->location, partial5, partial3);
15302     mrna->partial = new_partial;
15303   }
15304 }
15305 
ImposeCodingRegionPartials(SeqEntryPtr sep)15306 NLM_EXTERN void ImposeCodingRegionPartials (SeqEntryPtr sep)
15307 
15308 {
15309   VisitFeaturesInSep (sep, NULL, ImposeCDSPartials);
15310   VisitFeaturesInSep (sep, NULL, ImposeGenePartials);
15311 }
15312 
ResynchCDSPartials(SeqFeatPtr sfp,Pointer userdata)15313 NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
15314 
15315 {
15316   SeqFeatPtr   bestprot;
15317   BioseqPtr    bsp;
15318   MolInfoPtr   mip;
15319   Boolean      partial5;
15320   Boolean      partial3;
15321   ProtRefPtr   prp;
15322   SeqEntryPtr  sep;
15323   SeqIdPtr     sip;
15324   SeqLocPtr    slp;
15325   ValNodePtr   vnp;
15326   /* variables for logging */
15327   LogInfoPtr    lip;
15328   CharPtr orig_loc = NULL, new_loc;
15329   Char    id_buf[100];
15330   Boolean new_partial;
15331 
15332   if (sfp->data.choice != SEQFEAT_CDREGION) return;
15333   lip = (LogInfoPtr) userdata;
15334   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15335   new_partial = (Boolean) (sfp->partial || partial5 || partial3);
15336   if (new_partial != sfp->partial) {
15337     sfp->partial = new_partial;
15338     if (lip != NULL) {
15339       lip->data_in_log = TRUE;
15340       if (lip->fp != NULL) {
15341         fprintf (lip->fp, "Changed partial flag for coding region\n");
15342       }
15343     }
15344   }
15345 
15346   /*
15347   slp = SeqLocFindNext (sfp->location, NULL);
15348   if (slp == NULL) return;
15349   */
15350   sip = SeqLocId (sfp->product);
15351   if (sip == NULL) return;
15352   bsp = BioseqFind (sip);
15353   if (bsp == NULL || !ISA_aa (bsp->mol) || bsp->repr != Seq_repr_raw) return;
15354 
15355   bestprot = SeqMgrGetBestProteinFeature (bsp, NULL);
15356   if (bestprot == NULL) {
15357     bestprot = GetBestProteinFeatureUnindexed (sfp->product);
15358   }
15359 
15360   sep = SeqMgrGetSeqEntryForData (bsp);
15361   if (sep == NULL) return;
15362 
15363   /* only synchronize and extend best if unprocessed or preprotein, not mature/signal/transit peptide */
15364   if (bestprot != NULL && bestprot->location != NULL) {
15365     prp = (ProtRefPtr) bestprot->data.value.ptrvalue;
15366     slp = bestprot->location;
15367     if (prp != NULL && prp->processed < 2 && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)) {
15368 
15369       if (lip != NULL) {
15370         orig_loc = SeqLocPrintUseBestID (bestprot->location);
15371       }
15372       slp = NULL;
15373       sip = SeqLocId (bestprot->location);
15374       if (sip != NULL) {
15375         slp = WholeIntervalFromSeqId (sip);
15376       }
15377       if (slp == NULL) {
15378         slp = CreateWholeInterval (sep);
15379       }
15380       SetSeqLocPartial (slp, partial5, partial3);
15381       if (slp != NULL
15382           && (!AsnIoMemComp (slp, bestprot->location, (AsnWriteFunc) SeqLocAsnWrite) || bestprot->partial != sfp->partial)) {
15383         bestprot->location = SeqLocFree (bestprot->location);
15384         bestprot->location = slp;
15385 
15386         bestprot->partial = sfp->partial;
15387         if (lip != NULL) {
15388           new_loc = SeqLocPrintUseBestID (bestprot->location);
15389           lip->data_in_log = TRUE;
15390           if (lip->fp != NULL) {
15391             fprintf (lip->fp, "Synchronized coding region partials for protein feature location at %s\n", orig_loc/*, new_loc*/);
15392           }
15393           new_loc = MemFree (new_loc);
15394         }
15395       } else {
15396         slp = SeqLocFree (slp);
15397       }
15398       orig_loc = MemFree (orig_loc);
15399     }
15400   }
15401 
15402   vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
15403   id_buf[0] = 0;
15404   if (vnp == NULL) {
15405     vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15406     if (vnp != NULL) {
15407       mip = MolInfoNew ();
15408       vnp->data.ptrvalue = (Pointer) mip;
15409       if (mip != NULL) {
15410         mip->biomol = 8; /* peptide */
15411         mip->tech = 13; /* concept-trans-author */
15412         if (lip != NULL) {
15413           if (lip->fp != NULL) {
15414             SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15415             fprintf (lip->fp, "Added MolInfo descriptor for %s\n", id_buf);
15416           }
15417           lip->data_in_log = TRUE;
15418         }
15419       }
15420     }
15421   }
15422 
15423   if (vnp != NULL && (mip = (MolInfoPtr) vnp->data.ptrvalue) != NULL) {
15424     if (partial5 && partial3) {
15425       if (mip->completeness != 5) {
15426         mip->completeness = 5;
15427         if (lip != NULL) {
15428           if (lip->fp != NULL) {
15429             if (id_buf[0] == 0) {
15430               SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15431             }
15432             fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15433             lip->data_in_log = TRUE;
15434           }
15435         }
15436       }
15437     } else if (partial5) {
15438       if (mip->completeness != 3) {
15439         mip->completeness = 3;
15440         if (lip != NULL) {
15441           if (lip->fp != NULL) {
15442             if (id_buf[0] == 0) {
15443               SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15444             }
15445             fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15446           }
15447           lip->data_in_log = TRUE;
15448         }
15449       }
15450     } else if (partial3) {
15451       if (mip->completeness != 4) {
15452         mip->completeness = 4;
15453         if (lip != NULL) {
15454           if (lip->fp != NULL) {
15455             if (id_buf[0] == 0) {
15456               SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15457             }
15458             fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15459           }
15460           lip->data_in_log = TRUE;
15461         }
15462       }
15463     } else if (sfp->partial) {
15464       if (mip->completeness != 2) {
15465         mip->completeness = 2;
15466         if (lip != NULL) {
15467           if (lip->fp != NULL) {
15468             if (id_buf[0] == 0) {
15469               SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15470             }
15471             fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15472           }
15473           lip->data_in_log = TRUE;
15474         }
15475       }
15476     } else {
15477       if (mip->completeness != 0 && mip->completeness != 1) {
15478         mip->completeness = 0;
15479         if (lip != NULL) {
15480           if (lip->fp != NULL) {
15481             if (id_buf[0] == 0) {
15482               SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15483             }
15484             fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15485           }
15486           lip->data_in_log = TRUE;
15487         }
15488       }
15489     }
15490   }
15491 }
15492 
15493 
ResynchCodingRegionPartialsEx(SeqEntryPtr sep,FILE * log_fp)15494 NLM_EXTERN Boolean ResynchCodingRegionPartialsEx (SeqEntryPtr sep, FILE *log_fp)
15495 
15496 {
15497   LogInfoData lid;
15498   MemSet (&lid, 0, sizeof (LogInfoData));
15499   lid.fp = log_fp;
15500   VisitFeaturesInSep (sep, &lid, ResynchCDSPartials);
15501   return lid.data_in_log;
15502 }
15503 
ResynchCodingRegionPartials(SeqEntryPtr sep)15504 NLM_EXTERN void ResynchCodingRegionPartials (SeqEntryPtr sep)
15505 
15506 {
15507   ResynchCodingRegionPartialsEx (sep, NULL);
15508 }
15509 
15510 
ResynchMRNAPartials(SeqFeatPtr sfp,Pointer userdata)15511 NLM_EXTERN void ResynchMRNAPartials (SeqFeatPtr sfp, Pointer userdata)
15512 
15513 {
15514   BioseqPtr    bsp;
15515   MolInfoPtr   mip;
15516   Boolean      partial5;
15517   Boolean      partial3;
15518   RnaRefPtr    rrp;
15519   SeqEntryPtr  sep;
15520   SeqIdPtr     sip;
15521   SeqLocPtr    slp;
15522   ValNodePtr   vnp;
15523 
15524   if (sfp->data.choice != SEQFEAT_RNA) return;
15525   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
15526   if (rrp == NULL || rrp->type != 2) return;
15527   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15528   sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
15529   slp = SeqLocFindNext (sfp->location, NULL);
15530   if (slp == NULL) return;
15531   sip = SeqLocId (sfp->product);
15532   if (sip == NULL) return;
15533   bsp = BioseqFind (sip);
15534   if (bsp != NULL && ISA_na (bsp->mol) && bsp->repr == Seq_repr_raw) {
15535     sep = SeqMgrGetSeqEntryForData (bsp);
15536     if (sep == NULL) return;
15537     vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
15538     if (vnp == NULL) {
15539       vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15540       if (vnp != NULL) {
15541         mip = MolInfoNew ();
15542         vnp->data.ptrvalue = (Pointer) mip;
15543         if (mip != NULL) {
15544           mip->biomol = 3; /* mRNA */
15545           mip->tech = 1; /* standard */
15546         }
15547       }
15548     }
15549     if (vnp != NULL) {
15550       mip = (MolInfoPtr) vnp->data.ptrvalue;
15551       if (mip != NULL) {
15552         if (partial5 && partial3) {
15553           mip->completeness = 5;
15554         } else if (partial5) {
15555           mip->completeness = 3;
15556         } else if (partial3) {
15557           mip->completeness = 4;
15558         } else if (sfp->partial) {
15559           mip->completeness = 2;
15560         } else {
15561           mip->completeness = 0;
15562         }
15563       }
15564     }
15565   }
15566 }
15567 
ResynchMessengerRNAPartials(SeqEntryPtr sep)15568 NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep)
15569 
15570 {
15571   VisitFeaturesInSep (sep, NULL, ResynchMRNAPartials);
15572 }
15573 
ResynchPeptidePartials(SeqFeatPtr sfp,Pointer userdata)15574 NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata)
15575 
15576 {
15577   SeqFeatPtr   bestprot;
15578   BioseqPtr    bsp;
15579   MolInfoPtr   mip;
15580   Boolean      partial5;
15581   Boolean      partial3;
15582   ProtRefPtr   prp;
15583   SeqEntryPtr  sep;
15584   SeqIdPtr     sip;
15585   SeqLocPtr    slp;
15586   ValNodePtr   vnp;
15587 
15588   if (sfp->data.choice != SEQFEAT_PROT) return;
15589   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
15590   if (prp == NULL) return;
15591   if (prp->processed < 1 || prp->processed > 5) return;
15592   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15593   sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
15594   /*
15595   slp = SeqLocFindNext (sfp->location, NULL);
15596   if (slp == NULL) return;
15597   */
15598   sip = SeqLocId (sfp->product);
15599   if (sip == NULL) return;
15600   bsp = BioseqFind (sip);
15601   if (bsp != NULL && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_raw) {
15602     sep = SeqMgrGetSeqEntryForData (bsp);
15603     if (sep == NULL) return;
15604     bestprot = SeqMgrGetBestProteinFeature (bsp, NULL);
15605     if (bestprot == NULL) {
15606       bestprot = GetBestProteinFeatureUnindexed (sfp->product);
15607     }
15608     if (bestprot != NULL && bestprot->location != NULL) {
15609       /* only synchronize and extend best if unprocessed or preprotein, not mature/signal/transit peptide */
15610       prp = (ProtRefPtr) bestprot->data.value.ptrvalue;
15611       slp = bestprot->location;
15612       if (prp != NULL && prp->processed < 2 && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)) {
15613         slp = NULL;
15614         sip = SeqLocId (bestprot->location);
15615         if (sip != NULL) {
15616           slp = WholeIntervalFromSeqId (sip);
15617         }
15618         if (slp == NULL) {
15619           slp = CreateWholeInterval (sep);
15620         }
15621         if (slp != NULL) {
15622           bestprot->location = SeqLocFree (bestprot->location);
15623           bestprot->location = slp;
15624         }
15625         SetSeqLocPartial (bestprot->location, partial5, partial3);
15626         bestprot->partial = sfp->partial;
15627       }
15628     }
15629     vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
15630     if (vnp == NULL) {
15631       vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15632       if (vnp != NULL) {
15633         mip = MolInfoNew ();
15634         vnp->data.ptrvalue = (Pointer) mip;
15635         if (mip != NULL) {
15636           mip->biomol = 8;
15637           mip->tech = 13;
15638         }
15639       }
15640     }
15641     if (vnp != NULL) {
15642       mip = (MolInfoPtr) vnp->data.ptrvalue;
15643       if (mip != NULL) {
15644         if (partial5 && partial3) {
15645           mip->completeness = 5;
15646         } else if (partial5) {
15647           mip->completeness = 3;
15648         } else if (partial3) {
15649           mip->completeness = 4;
15650         } else if (sfp->partial) {
15651           mip->completeness = 2;
15652         } else {
15653           mip->completeness = 0;
15654         }
15655       }
15656     }
15657   }
15658 }
15659 
ResynchProteinPartials(SeqEntryPtr sep)15660 NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep)
15661 
15662 {
15663   VisitFeaturesInSep (sep, NULL, ResynchPeptidePartials);
15664 }
15665 
15666 /* SeqIdStripLocus removes the SeqId.name field if accession is set */
15667 
SeqIdStripLocus(SeqIdPtr sip)15668 NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip)
15669 
15670 {
15671   TextSeqIdPtr  tip;
15672 
15673   if (sip != NULL) {
15674     switch (sip->choice) {
15675       case SEQID_GENBANK :
15676       case SEQID_EMBL :
15677       case SEQID_DDBJ :
15678       case SEQID_OTHER :
15679       case SEQID_TPG:
15680       case SEQID_TPE:
15681       case SEQID_TPD:
15682       case SEQID_GPIPE:
15683         tip = (TextSeqIdPtr) sip->data.ptrvalue;
15684         if (tip != NULL) {
15685           if (! HasNoText (tip->accession)) {
15686             tip->name = MemFree (tip->name);
15687           }
15688         }
15689         break;
15690       default :
15691         break;
15692     }
15693   }
15694   return sip;
15695 }
15696 
15697 //LCOV_EXCL_START
StripLocusFromSeqLoc(SeqLocPtr location)15698 NLM_EXTERN SeqLocPtr StripLocusFromSeqLoc (SeqLocPtr location)
15699 
15700 {
15701   SeqLocPtr      loc;
15702   SeqLocPtr      next;
15703   PackSeqPntPtr  psp;
15704   SeqBondPtr     sbp;
15705   SeqIntPtr      sinp;
15706   SeqIdPtr       sip;
15707   SeqLocPtr      slp;
15708   SeqPntPtr      spp;
15709 
15710   if (location == NULL) return NULL;
15711   slp = SeqLocFindNext (location, NULL);
15712   while (slp != NULL) {
15713     next = SeqLocFindNext (location, slp);
15714     switch (slp->choice) {
15715       case SEQLOC_EMPTY :
15716       case SEQLOC_WHOLE :
15717         sip = (SeqIdPtr) slp->data.ptrvalue;
15718         SeqIdStripLocus (sip);
15719         break;
15720       case SEQLOC_INT :
15721         sinp = (SeqIntPtr) slp->data.ptrvalue;
15722         if (sinp != NULL) {
15723           SeqIdStripLocus (sinp->id);
15724         }
15725         break;
15726       case SEQLOC_PACKED_INT :
15727       case SEQLOC_MIX :
15728       case SEQLOC_EQUIV :
15729         loc = (SeqLocPtr) slp->data.ptrvalue;
15730         while (loc != NULL) {
15731           sip = SeqLocId (loc);
15732           SeqIdStripLocus (sip);
15733           loc = loc->next;
15734         }
15735         break;
15736       case SEQLOC_BOND :
15737         sbp = (SeqBondPtr) slp->data.ptrvalue;
15738         if (sbp != NULL) {
15739           spp = sbp->a;
15740           if (spp != NULL) {
15741             SeqIdStripLocus (spp->id);
15742           }
15743           spp = sbp->b;
15744           if (spp != NULL) {
15745             SeqIdStripLocus (spp->id);
15746           }
15747         }
15748         break;
15749       case SEQLOC_PNT :
15750         spp = (SeqPntPtr) slp->data.ptrvalue;
15751         if (spp != NULL) {
15752           SeqIdStripLocus (spp->id);
15753         }
15754         break;
15755       case SEQLOC_PACKED_PNT :
15756         psp = (PackSeqPntPtr) slp->data.ptrvalue;
15757         if (psp != NULL) {
15758           SeqIdStripLocus (psp->id);
15759         }
15760         break;
15761       default :
15762         break;
15763     }
15764     slp = next;
15765   }
15766   return location;
15767 }
15768 
GetRidOfLocusCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)15769 static void GetRidOfLocusCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
15770 
15771 {
15772   BioseqPtr     bsp;
15773   BioseqSetPtr  bssp;
15774   SeqAnnotPtr   sap;
15775   SeqFeatPtr    sfp;
15776 
15777   if (sep == NULL || sep->data.ptrvalue == NULL) return;
15778   sap = NULL;
15779   if (IS_Bioseq (sep)) {
15780     bsp = (BioseqPtr) sep->data.ptrvalue;
15781     sap = bsp->annot;
15782   } else if (IS_Bioseq_set (sep)) {
15783     bssp = (BioseqSetPtr) sep->data.ptrvalue;
15784     sap = bssp->annot;
15785   } else return;
15786   while (sap != NULL) {
15787     if (sap->type == 1 && sap->data != NULL) {
15788       sfp = (SeqFeatPtr) sap->data;
15789       while (sfp != NULL) {
15790         StripLocusFromSeqLoc (sfp->location);
15791         StripLocusFromSeqLoc (sfp->product);
15792         sfp = sfp->next;
15793       }
15794     }
15795     sap = sap->next;
15796   }
15797 }
15798 
GetRidOfLocusInSeqIds(Uint2 entityID,SeqEntryPtr sep)15799 NLM_EXTERN void GetRidOfLocusInSeqIds (Uint2 entityID, SeqEntryPtr sep)
15800 
15801 {
15802   if (entityID < 1 && sep == NULL) return;
15803   if (entityID > 0 && sep == NULL) {
15804     sep = GetTopSeqEntryForEntityID (entityID);
15805   }
15806   if (sep == NULL) return;
15807   SeqEntryExplore (sep, NULL, GetRidOfLocusCallback);
15808 }
15809 //LCOV_EXCL_STOP
15810 
15811 /* Mac can now use static parse tables by using
15812    Make Strings Read-Only and Store Static Data in TOC
15813 #ifdef OS_MAC
15814 #define ASNLOAD_NEEDED 1
15815 #endif
15816 */
15817 #if defined(OS_DOS) || defined(WIN16)
15818 #define ASNLOAD_NEEDED 1
15819 #endif
15820 
FileExists(CharPtr dirname,CharPtr subname,CharPtr filename)15821 static Boolean FileExists (CharPtr dirname, CharPtr subname, CharPtr filename)
15822 
15823 {
15824   Char  path [PATH_MAX];
15825 
15826   StringNCpy_0 (path, dirname, sizeof (path));
15827   FileBuildPath (path, subname, NULL);
15828   FileBuildPath (path, NULL, filename);
15829   return (Boolean) (FileLength (path) > 0);
15830 }
15831 
15832 /*
15833 static Boolean CheckAsnloadPath (CharPtr dirname, CharPtr subdir)
15834 
15835 {
15836 #ifdef ASNLOAD_NEEDED
15837   Char  fname [16];
15838   int   i;
15839 
15840   for (i = 60; i <= 99; ++i) {
15841     sprintf (fname, "asnmedli.l%02d", (int) i);
15842     if (FileExists (dirname, subdir, fname)) {
15843       return TRUE;
15844     }
15845   }
15846   return FALSE;
15847 #else
15848   return TRUE;
15849 #endif
15850 }
15851 */
15852 
CheckDataPath(CharPtr dirname,CharPtr subdir)15853 static Boolean CheckDataPath (CharPtr dirname, CharPtr subdir)
15854 
15855 {
15856   if (FileExists (dirname, subdir, "seqcode.val")) return TRUE;
15857   return (Boolean) (FileExists (dirname, subdir, "objprt.prt"));
15858 }
15859 
CheckErrMsgPath(CharPtr dirname,CharPtr subdir)15860 static Boolean CheckErrMsgPath (CharPtr dirname, CharPtr subdir)
15861 
15862 {
15863   return (Boolean) (FileExists (dirname, subdir, "valid.msg"));
15864 }
15865 
15866 //LCOV_EXCL_START
SetTransientPath(CharPtr dirname,CharPtr subname,CharPtr file,CharPtr section,CharPtr type)15867 static void SetTransientPath (CharPtr dirname, CharPtr subname, CharPtr file,
15868                               CharPtr section, CharPtr type)
15869 
15870 {
15871   Char  path [PATH_MAX];
15872 
15873   StringNCpy_0 (path, dirname, sizeof (path));
15874   FileBuildPath (path, subname, NULL);
15875   TransientSetAppParam (file, section, type, path);
15876 }
15877 
UseLocalAsnloadDataAndErrMsg(void)15878 NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
15879 
15880 {
15881   Boolean  dataFound;
15882   Char     path [PATH_MAX];
15883   Char     appPath[PATH_MAX];
15884   CharPtr  ptr;
15885 
15886   ProgramPath (appPath, sizeof (appPath));
15887   StrCpy(path, appPath);
15888   /* data a sibling of our application? */
15889   ptr = StringRChr (path, DIRDELIMCHR);
15890   if (ptr != NULL) {
15891     ptr++;
15892     *ptr = '\0';
15893   }
15894   dataFound = CheckDataPath (path, "data");
15895   if (! (dataFound)) {
15896   /* data an uncle of our application? */
15897     if (ptr != NULL) {
15898       ptr--;
15899       *ptr = '\0';
15900       ptr = StringRChr (path, DIRDELIMCHR);
15901       if (ptr != NULL) {
15902         ptr++;
15903         *ptr = '\0';
15904       }
15905       dataFound = CheckDataPath (path, "data");
15906     }
15907   }
15908 #ifdef OS_UNIX_DARWIN
15909   if (! (dataFound) && IsApplicationPackage (appPath)) {
15910       /* is data inside our application within Contents/Resources? */
15911       StrCpy (path, appPath);
15912       FileBuildPath (path, "Contents", NULL);
15913       FileBuildPath (path, "Resources", NULL);
15914       dataFound = CheckDataPath (path, "data");
15915       if (! dataFound) {
15916         StrCpy (path, appPath);
15917         ptr = StringStr (path, "/ncbi/build/");
15918         if (ptr != NULL) {
15919           /* see if running under older Xcode 3 build environment */
15920           ptr [5] = '\0';
15921           dataFound = CheckDataPath (path, "data");
15922         }
15923       }
15924       if (! dataFound) {
15925         StrCpy (path, appPath);
15926         ptr = StringStr (path, "/ncbi/make/");
15927         if (ptr != NULL) {
15928           /* see if running under newer Xcode 3 build environment */
15929           ptr [5] = '\0';
15930           dataFound = CheckDataPath (path, "data");
15931         }
15932       }
15933       if (! dataFound) {
15934           StrCpy (path, appPath);
15935           ptr = StringStr (path, "/Library/Developer/");
15936           if (ptr != NULL) {
15937               /* see if running under Xcode 4 build environment */
15938               ptr [19] = '\0';
15939               dataFound = CheckDataPath (path, "data");
15940           }
15941       }
15942   }
15943 #endif
15944   if (dataFound) {
15945     SetTransientPath (path, "asnload", "NCBI", "NCBI", "ASNLOAD");
15946     SetTransientPath (path, "data", "NCBI", "NCBI", "DATA");
15947     if (CheckErrMsgPath (path, "errmsg")) {
15948       SetTransientPath (path, "errmsg", "NCBI", "ErrorProcessing", "MsgPath");
15949       TransientSetAppParam ("NCBI", "ErrorProcessing", "EO_BEEP", "No");
15950     }
15951     return TRUE;
15952   }
15953   return FALSE;
15954 }
15955 
CreateWholeInterval(SeqEntryPtr sep)15956 NLM_EXTERN SeqLocPtr CreateWholeInterval (SeqEntryPtr sep)
15957 
15958 {
15959   BioseqPtr  bsp;
15960   SeqIntPtr  sip;
15961   SeqLocPtr  slp;
15962 
15963   slp = NULL;
15964   if (sep != NULL && sep->choice == 1 && sep->data.ptrvalue != NULL) {
15965     bsp = (BioseqPtr) sep->data.ptrvalue;
15966     slp = ValNodeNew (NULL);
15967     if (slp != NULL) {
15968       sip = SeqIntNew ();
15969       if (sip != NULL) {
15970         slp->choice = SEQLOC_INT;
15971         slp->data.ptrvalue = (Pointer) sip;
15972         sip->from = 0;
15973         sip->to = bsp->length - 1;
15974         if (ISA_na (bsp->mol)) {
15975           sip->strand = Seq_strand_plus;
15976         }
15977         sip->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
15978       }
15979     }
15980   }
15981   return slp;
15982 }
15983 //LCOV_EXCL_STOP
15984 
15985 
WholeIntervalFromSeqId(SeqIdPtr sip)15986 NLM_EXTERN SeqLocPtr WholeIntervalFromSeqId (SeqIdPtr sip)
15987 
15988 {
15989   BioseqPtr  bsp;
15990   SeqIntPtr  sintp;
15991   SeqLocPtr  slp;
15992 
15993   if (sip == NULL) return NULL;
15994   bsp = BioseqFindCore (sip);
15995   if (bsp == NULL) return NULL;
15996   slp = ValNodeNew (NULL);
15997   if (slp == NULL) return NULL;
15998   sintp = SeqIntNew ();
15999   if (sintp == NULL) return NULL;
16000   slp->choice = SEQLOC_INT;
16001   slp->data.ptrvalue = (Pointer) sintp;
16002   sintp->from = 0;
16003   sintp->to = bsp->length - 1;
16004   if (ISA_na (bsp->mol)) {
16005     sintp->strand = Seq_strand_plus;
16006   }
16007   sintp->id = SeqIdStripLocus (SeqIdDup (sip));
16008   return slp;
16009 }
16010 
16011 //LCOV_EXCL_START
FreeAllFuzz(SeqLocPtr location)16012 NLM_EXTERN void FreeAllFuzz (SeqLocPtr location)
16013 
16014 {
16015   SeqIntPtr  sip;
16016   SeqLocPtr  slp;
16017 
16018   if (location == NULL) return;
16019   slp = SeqLocFindNext (location, NULL);
16020   while (slp != NULL) {
16021     if (slp->choice == SEQLOC_INT) {
16022       sip = (SeqIntPtr) slp->data.ptrvalue;
16023       if (sip != NULL) {
16024         sip->if_to = IntFuzzFree (sip->if_to);
16025         sip->if_from = IntFuzzFree (sip->if_from);
16026       }
16027     }
16028     slp = SeqLocFindNext (location, slp);
16029   }
16030 }
16031 //LCOV_EXCL_STOP
16032 
LocationHasNullsBetween(SeqLocPtr location)16033 NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location)
16034 
16035 {
16036   SeqLocPtr  slp;
16037 
16038   if (location == NULL) return FALSE;
16039   slp = SeqLocFindNext (location, NULL);
16040   while (slp != NULL) {
16041     if (slp->choice == SEQLOC_NULL) return TRUE;
16042     slp = SeqLocFindNext (location, slp);
16043   }
16044   return FALSE;
16045 }
16046 
NormalizeNullsBetween(SeqLocPtr location)16047 NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location)
16048 
16049 {
16050   SeqLocPtr  next, tmp, vnp;
16051 
16052   if (location == NULL) return;
16053   if (! LocationHasNullsBetween (location)) return;
16054 
16055   if (location->choice != SEQLOC_MIX) return;
16056   vnp = (ValNodePtr) location->data.ptrvalue;
16057   if (vnp == NULL) return;
16058 
16059   while (vnp != NULL && vnp->next != NULL) {
16060     next = vnp->next;
16061     if (vnp->choice != SEQLOC_NULL && next->choice != SEQLOC_NULL) {
16062       tmp = ValNodeNew (NULL);
16063       if (tmp != NULL) {
16064         tmp->choice = SEQLOC_NULL;
16065         tmp->next = vnp->next;
16066         vnp->next = tmp;
16067       }
16068     }
16069     vnp = next;
16070   }
16071 }
16072 
FindFeatFromFeatDefType(Uint2 subtype)16073 NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype)
16074 
16075 {
16076   switch (subtype) {
16077     case FEATDEF_GENE :
16078       return SEQFEAT_GENE;
16079     case FEATDEF_ORG :
16080       return SEQFEAT_ORG;
16081     case FEATDEF_CDS :
16082       return SEQFEAT_CDREGION;
16083     case FEATDEF_PROT :
16084       return SEQFEAT_PROT;
16085     case FEATDEF_PUB :
16086       return SEQFEAT_PUB;
16087     case FEATDEF_SEQ :
16088       return SEQFEAT_SEQ;
16089     case FEATDEF_REGION :
16090       return SEQFEAT_REGION;
16091     case FEATDEF_COMMENT :
16092       return SEQFEAT_COMMENT;
16093     case FEATDEF_BOND :
16094       return SEQFEAT_BOND;
16095     case FEATDEF_SITE :
16096       return SEQFEAT_SITE;
16097     case FEATDEF_RSITE :
16098       return SEQFEAT_RSITE;
16099     case FEATDEF_USER :
16100       return SEQFEAT_USER;
16101     case FEATDEF_TXINIT :
16102       return SEQFEAT_TXINIT;
16103     case FEATDEF_NUM :
16104       return SEQFEAT_NUM;
16105     case FEATDEF_PSEC_STR :
16106       return SEQFEAT_PSEC_STR;
16107     case FEATDEF_NON_STD_RESIDUE :
16108       return SEQFEAT_NON_STD_RESIDUE;
16109     case FEATDEF_HET :
16110       return SEQFEAT_HET;
16111     case FEATDEF_BIOSRC :
16112       return SEQFEAT_BIOSRC;
16113     default :
16114       if (subtype >= FEATDEF_preRNA && subtype <= FEATDEF_otherRNA) {
16115         return SEQFEAT_RNA;
16116       }
16117       if (subtype == FEATDEF_snoRNA) {
16118         return SEQFEAT_RNA;
16119       }
16120       if (subtype >= FEATDEF_ncRNA && subtype <= FEATDEF_tmRNA) {
16121         return SEQFEAT_RNA;
16122       }
16123       if (subtype >= FEATDEF_preprotein && subtype <= FEATDEF_transit_peptide_aa) {
16124         return SEQFEAT_PROT;
16125       }
16126       if (subtype >= FEATDEF_IMP && subtype <= FEATDEF_site_ref) {
16127         return SEQFEAT_IMP;
16128       }
16129       if (subtype >= FEATDEF_gap && subtype <= FEATDEF_oriT) {
16130         return SEQFEAT_IMP;
16131       }
16132       if (subtype >= FEATDEF_mobile_element && subtype <= FEATDEF_propeptide) {
16133         return SEQFEAT_IMP;
16134       }
16135       if (subtype == FEATDEF_propeptide_aa) {
16136         return SEQFEAT_PROT;
16137       }
16138   }
16139   return 0;
16140 }
16141 
16142 //LCOV_EXCL_START
MakeSeqID(CharPtr str)16143 NLM_EXTERN SeqIdPtr MakeSeqID(CharPtr str)
16144 
16145 {
16146   CharPtr   buf;
16147   Int4      len;
16148   SeqIdPtr  sip;
16149 
16150   sip = NULL;
16151   if (str != NULL && *str != '\0') {
16152     if (StringChr (str, '|') != NULL) {
16153       sip = SeqIdParse (str);
16154     } else {
16155       len = StringLen (str) + 5;
16156       buf = (CharPtr) MemNew (sizeof (Char) * len);
16157       sprintf (buf, "lcl|%s", str);
16158       sip = SeqIdParse (buf);
16159       buf = MemFree (buf);
16160     }
16161   }
16162   return sip;
16163 }
16164 
MakeUniqueSeqID(CharPtr prefix)16165 NLM_EXTERN SeqIdPtr MakeUniqueSeqID (CharPtr prefix)
16166 
16167 {
16168     Char buf[60];
16169     CharPtr tmp;
16170     Int2 ctr;
16171     ValNodePtr newid;
16172     ObjectIdPtr oid;
16173     ValNode vn;
16174     TextSeqId tsi;
16175     ValNodePtr altid;
16176     size_t len;
16177 
16178     altid = &vn;
16179     vn.choice = SEQID_GENBANK;
16180     vn.next = NULL;
16181     vn.data.ptrvalue = &tsi;
16182     tsi.name = NULL;
16183     tsi.accession = NULL;
16184     tsi.release = NULL;
16185     tsi.version = INT2_MIN;
16186 
16187     len = StringLen (prefix);
16188     if (len > 0 && len < 52) {
16189         tmp = StringMove(buf, prefix);
16190     } else {
16191         tmp = StringMove(buf, "tmpseq_");
16192     }
16193 
16194     newid = ValNodeNew(NULL);
16195     oid = ObjectIdNew();
16196     oid->str = buf;   /* allocate this later */
16197     newid->choice = SEQID_LOCAL;
16198     newid->data.ptrvalue = oid;
16199 
16200     tsi.name = buf;   /* check for alternative form */
16201 
16202     for (ctr = 1; ctr < 32000; ctr++)
16203     {
16204         sprintf(tmp, "%d", (int)ctr);
16205         if ((BioseqFindCore(newid) == NULL) && (BioseqFindCore(altid) == NULL))
16206         {
16207             oid->str = StringSave(buf);
16208             return newid;
16209         }
16210     }
16211 
16212     return NULL;
16213 }
16214 
SeqIdFindWorst(SeqIdPtr sip)16215 NLM_EXTERN SeqIdPtr SeqIdFindWorst (SeqIdPtr sip)
16216 
16217 {
16218   Uint1  order [NUM_SEQID];
16219 
16220   SeqIdBestRank (order, NUM_SEQID);
16221   order [SEQID_LOCAL] = 10;
16222   order [SEQID_GENBANK] = 5;
16223   order [SEQID_EMBL] = 5;
16224   order [SEQID_PIR] = 5;
16225   order [SEQID_SWISSPROT] = 5;
16226   order [SEQID_DDBJ] = 5;
16227   order [SEQID_PRF] = 5;
16228   order [SEQID_PDB] = 5;
16229   order [SEQID_TPG] = 5;
16230   order [SEQID_TPE] = 5;
16231   order [SEQID_TPD] = 5;
16232   order [SEQID_GPIPE] = 9;
16233   order [SEQID_NAMED_ANNOT_TRACK] = 9;
16234   order [SEQID_PATENT] = 10;
16235   order [SEQID_OTHER] = 8;
16236   order [SEQID_GENERAL] = 15;
16237   order [SEQID_GIBBSQ] = 15;
16238   order [SEQID_GIBBMT] = 15;
16239   order [SEQID_GIIM] = 20;
16240   order [SEQID_GI] = 20;
16241   return SeqIdSelect (sip, order, NUM_SEQID);
16242 }
16243 
CreateNewFeature(SeqEntryPtr sep,SeqEntryPtr placeHere,Uint1 choice,SeqFeatPtr useThis)16244 NLM_EXTERN SeqFeatPtr CreateNewFeature (SeqEntryPtr sep, SeqEntryPtr placeHere,
16245                              Uint1 choice, SeqFeatPtr useThis)
16246 
16247 {
16248   BioseqPtr     bsp;
16249   BioseqSetPtr  bssp;
16250   SeqFeatPtr    prev;
16251   SeqAnnotPtr   sap;
16252   SeqFeatPtr    sfp;
16253 
16254   if (sep == NULL || sep->choice != 1) return NULL;
16255   sfp = NULL;
16256   bsp = NULL;
16257   bssp = NULL;
16258   if (placeHere == NULL) {
16259     placeHere = sep;
16260   }
16261   if (placeHere != NULL && placeHere->data.ptrvalue != NULL) {
16262     if (placeHere->choice == 1) {
16263       bsp = (BioseqPtr) placeHere->data.ptrvalue;
16264       sap = bsp->annot;
16265       while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
16266         sap = sap->next;
16267       }
16268       if (sap == NULL) {
16269         sap = SeqAnnotNew ();
16270         if (sap != NULL) {
16271           sap->type = 1;
16272           sap->next = bsp->annot;
16273           bsp->annot = sap;
16274         }
16275         sap = bsp->annot;
16276       }
16277     } else if (placeHere->choice == 2) {
16278       bssp = (BioseqSetPtr) placeHere->data.ptrvalue;
16279       sap = bssp->annot;
16280       while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
16281         sap = sap->next;
16282       }
16283       if (sap == NULL) {
16284         sap = SeqAnnotNew ();
16285         if (sap != NULL) {
16286           sap->type = 1;
16287           sap->next = bssp->annot;
16288           bssp->annot = sap;
16289         }
16290         sap = bssp->annot;
16291       }
16292     } else {
16293       return NULL;
16294     }
16295     if (sap != NULL) {
16296       bsp = (BioseqPtr) sep->data.ptrvalue;
16297       if (useThis != NULL) {
16298         sfp = useThis;
16299       } else {
16300         sfp = SeqFeatNew ();
16301       }
16302       if (sap->data != NULL) {
16303         prev = sap->data;
16304         while (prev->next != NULL) {
16305           prev = prev->next;
16306         }
16307         prev->next = sfp;
16308       } else {
16309         sap->data = (Pointer) sfp;
16310       }
16311       if (sfp != NULL) {
16312         sfp->data.choice = choice;
16313         if (useThis == NULL) {
16314           sfp->location = CreateWholeInterval (sep);
16315         }
16316       }
16317     }
16318   }
16319   return sfp;
16320 }
16321 
CreateNewFeatureOnBioseq(BioseqPtr bsp,Uint1 choice,SeqLocPtr slp)16322 NLM_EXTERN SeqFeatPtr CreateNewFeatureOnBioseq (BioseqPtr bsp, Uint1 choice, SeqLocPtr slp)
16323 
16324 {
16325   SeqEntryPtr  sep;
16326   SeqFeatPtr   sfp;
16327 
16328   if (bsp == NULL) return NULL;
16329   sep = SeqMgrGetSeqEntryForData (bsp);
16330   if (sep == NULL) return NULL;
16331   sfp = CreateNewFeature (sep, NULL, choice, NULL);
16332   if (sfp == NULL) return NULL;
16333   if (slp != NULL) {
16334     sfp->location = SeqLocFree (sfp->location);
16335     sfp->location = AsnIoMemCopy (slp, (AsnReadFunc) SeqLocAsnRead,
16336                                   (AsnWriteFunc) SeqLocAsnWrite);
16337   }
16338   return sfp;
16339 }
16340 
CreateNewDescriptor(SeqEntryPtr sep,Uint1 choice)16341 NLM_EXTERN ValNodePtr CreateNewDescriptor (SeqEntryPtr sep, Uint1 choice)
16342 
16343 {
16344   BioseqPtr     bsp;
16345   BioseqSetPtr  bssp;
16346   Uint1         _class;
16347   ValNodePtr    descr;
16348   SeqEntryPtr   seqentry;
16349   ValNodePtr    vnp;
16350 
16351   vnp = NULL;
16352   if (sep != NULL) {
16353     descr = NULL;
16354     vnp = NULL;
16355     bsp = NULL;
16356     bssp = NULL;
16357     seqentry = sep;
16358     while (seqentry != NULL) {
16359       if (seqentry->choice == 1) {
16360         bsp = (BioseqPtr) seqentry->data.ptrvalue;
16361         if (bsp != NULL) {
16362           descr = bsp->descr;
16363           vnp = SeqDescrNew (descr);
16364           if (descr == NULL) {
16365             bsp->descr = vnp;
16366           }
16367         }
16368         seqentry = NULL;
16369       } else if (seqentry->choice == 2) {
16370         bssp = (BioseqSetPtr) seqentry->data.ptrvalue;
16371         if (bssp != NULL) {
16372           _class = bssp->_class;
16373           if (_class == 7) {
16374             descr = bssp->descr;
16375             vnp = SeqDescrNew (descr);
16376             if (descr == NULL) {
16377               bssp->descr = vnp;
16378             }
16379             seqentry = NULL;
16380           } else if ((_class >= 5 && _class <= 8) || _class == 11 /* || _class == BioseqseqSet_class_gen_prod_set */) {
16381             seqentry = bssp->seq_set;
16382           } else {
16383             descr = bssp->descr;
16384             vnp = SeqDescrNew (descr);
16385             if (descr == NULL) {
16386               bssp->descr = vnp;
16387             }
16388             seqentry = NULL;
16389           }
16390         } else {
16391           seqentry = NULL;
16392         }
16393       } else {
16394         seqentry = NULL;
16395       }
16396     }
16397     if (vnp != NULL) {
16398       vnp->choice = choice;
16399     }
16400   }
16401   return vnp;
16402 }
16403 
16404 
CreateNewDescriptorOnBioseq(BioseqPtr bsp,Uint1 choice)16405 NLM_EXTERN ValNodePtr CreateNewDescriptorOnBioseq (BioseqPtr bsp, Uint1 choice)
16406 
16407 {
16408   SeqEntryPtr  sep;
16409 
16410   if (bsp == NULL) return NULL;
16411   sep = SeqMgrGetSeqEntryForData (bsp);
16412   if (sep == NULL) return NULL;
16413   return CreateNewDescriptor (sep, choice);
16414 }
16415 
16416 
16417 /* common functions to scan binary ASN.1 file of entire release as Bioseq-set */
16418 
VisitSeqIdList(SeqIdPtr sip,Pointer userdata,VisitSeqIdFunc callback)16419 static Int4 VisitSeqIdList (SeqIdPtr sip, Pointer userdata, VisitSeqIdFunc callback)
16420 
16421 {
16422   Int4  index = 0;
16423 
16424   while (sip != NULL) {
16425     if (callback != NULL) {
16426       callback (sip, userdata);
16427     }
16428     index++;
16429     sip = sip->next;
16430   }
16431   return index;
16432 }
16433 
VisitSeqIdsInSeqLoc(SeqLocPtr slp,Pointer userdata,VisitSeqIdFunc callback)16434 NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqIdFunc callback)
16435 
16436 {
16437   Int4           index = 0;
16438   SeqLocPtr      loc;
16439   PackSeqPntPtr  psp;
16440   SeqBondPtr     sbp;
16441   SeqIntPtr      sinp;
16442   SeqIdPtr       sip;
16443   SeqPntPtr      spp;
16444 
16445   if (slp == NULL) return index;
16446 
16447   while (slp != NULL) {
16448     switch (slp->choice) {
16449       case SEQLOC_NULL :
16450         break;
16451       case SEQLOC_EMPTY :
16452       case SEQLOC_WHOLE :
16453         sip = (SeqIdPtr) slp->data.ptrvalue;
16454         index += VisitSeqIdList (sip, userdata, callback);
16455         break;
16456       case SEQLOC_INT :
16457         sinp = (SeqIntPtr) slp->data.ptrvalue;
16458         if (sinp != NULL) {
16459           sip = sinp->id;
16460           index += VisitSeqIdList (sip, userdata, callback);
16461         }
16462         break;
16463       case SEQLOC_PNT :
16464         spp = (SeqPntPtr) slp->data.ptrvalue;
16465         if (spp != NULL) {
16466           sip = spp->id;
16467           index += VisitSeqIdList (sip, userdata, callback);
16468         }
16469         break;
16470       case SEQLOC_PACKED_PNT :
16471         psp = (PackSeqPntPtr) slp->data.ptrvalue;
16472         if (psp != NULL) {
16473           sip = psp->id;
16474           index += VisitSeqIdList (sip, userdata, callback);
16475         }
16476         break;
16477       case SEQLOC_PACKED_INT :
16478       case SEQLOC_MIX :
16479       case SEQLOC_EQUIV :
16480         loc = (SeqLocPtr) slp->data.ptrvalue;
16481         while (loc != NULL) {
16482           index += VisitSeqIdsInSeqLoc (loc, userdata, callback);
16483           loc = loc->next;
16484         }
16485         break;
16486       case SEQLOC_BOND :
16487         sbp = (SeqBondPtr) slp->data.ptrvalue;
16488         if (sbp != NULL) {
16489           spp = (SeqPntPtr) sbp->a;
16490           if (spp != NULL) {
16491             sip = spp->id;
16492             index += VisitSeqIdList (sip, userdata, callback);
16493           }
16494           spp = (SeqPntPtr) sbp->b;
16495           if (spp != NULL) {
16496             sip = spp->id;
16497             index += VisitSeqIdList (sip, userdata, callback);
16498           }
16499         }
16500         break;
16501       case SEQLOC_FEAT :
16502         break;
16503       default :
16504         break;
16505     }
16506     slp = slp->next;
16507   }
16508 
16509   return index;
16510 }
16511 
VisitSeqIdsInBioseq(BioseqPtr bsp,Pointer userdata,VisitSeqIdFunc callback)16512 NLM_EXTERN Int4 VisitSeqIdsInBioseq (BioseqPtr bsp, Pointer userdata, VisitSeqIdFunc callback)
16513 
16514 {
16515   Int4  index = 0;
16516 
16517   if (bsp == NULL) return index;
16518 
16519   if (bsp->id != NULL) {
16520     index += VisitSeqIdList (bsp->id, userdata, callback);
16521   }
16522 
16523   return index;
16524 }
16525 
VisitSeqIdsInSeqFeat(SeqFeatPtr sfp,Pointer userdata,VisitSeqIdFunc callback)16526 NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback)
16527 
16528 {
16529   CodeBreakPtr  cbp;
16530   CdRegionPtr   crp;
16531   Int4          index = 0;
16532   RnaRefPtr     rrp;
16533   tRNAPtr       trp;
16534 
16535   if (sfp == NULL) return index;
16536 
16537   index += VisitSeqIdsInSeqLoc (sfp->location, userdata, callback);
16538   index += VisitSeqIdsInSeqLoc (sfp->product, userdata, callback);
16539 
16540   switch (sfp->data.choice) {
16541     case SEQFEAT_CDREGION :
16542       crp = (CdRegionPtr) sfp->data.value.ptrvalue;
16543       if (crp != NULL) {
16544         for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
16545           index += VisitSeqIdsInSeqLoc (cbp->loc, userdata, callback);
16546         }
16547       }
16548       break;
16549     case SEQFEAT_RNA :
16550       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
16551       if (rrp != NULL && rrp->ext.choice == 2) {
16552         trp = (tRNAPtr) rrp->ext.value.ptrvalue;
16553         if (trp != NULL && trp->anticodon != NULL) {
16554           index += VisitSeqIdsInSeqLoc (trp->anticodon, userdata, callback);
16555         }
16556       }
16557       break;
16558     default :
16559       break;
16560   }
16561 
16562   return index;
16563 }
16564 
VisitSeqIdsInSeqAlign(SeqAlignPtr sap,Pointer userdata,VisitSeqIdFunc callback)16565 NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback)
16566 
16567 {
16568   DenseDiagPtr  ddp;
16569   DenseSegPtr   dsp;
16570   Int4          index = 0;
16571   SeqIdPtr      sip;
16572   SeqLocPtr     slp = NULL;
16573   StdSegPtr     ssp;
16574 
16575   if (sap == NULL) return index;
16576 
16577   if (sap->bounds != NULL) {
16578     sip = SeqLocId (sap->bounds);
16579     index += VisitSeqIdList (sip, userdata, callback);
16580   }
16581 
16582   if (sap->segs == NULL) return index;
16583 
16584   switch (sap->segtype) {
16585     case SAS_DENDIAG :
16586       ddp = (DenseDiagPtr) sap->segs;
16587       if (ddp != NULL) {
16588         for (sip = ddp->id; sip != NULL; sip = sip->next) {
16589           index += VisitSeqIdList (sip, userdata, callback);
16590         }
16591       }
16592       break;
16593     case SAS_DENSEG :
16594       dsp = (DenseSegPtr) sap->segs;
16595       if (dsp != NULL) {
16596         for (sip = dsp->ids; sip != NULL; sip = sip->next) {
16597           index += VisitSeqIdList (sip, userdata, callback);
16598         }
16599       }
16600       break;
16601     case SAS_STD :
16602       ssp = (StdSegPtr) sap->segs;
16603       for (slp = ssp->loc; slp != NULL; slp = slp->next) {
16604         sip = SeqLocId (slp);
16605         index += VisitSeqIdList (sip, userdata, callback);
16606       }
16607       break;
16608     case SAS_DISC :
16609       /* recursive */
16610       for (sap = (SeqAlignPtr) sap->segs; sap != NULL; sap = sap->next) {
16611         index += VisitSeqIdsInSeqAlign (sap, userdata, callback);
16612       }
16613       break;
16614     default :
16615       break;
16616   }
16617 
16618   return index;
16619 }
16620 
VisitSeqIdsInSeqGraph(SeqGraphPtr sgp,Pointer userdata,VisitSeqIdFunc callback)16621 NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback)
16622 
16623 {
16624   Int4      index = 0;
16625   SeqIdPtr  sip;
16626 
16627   if (sgp == NULL) return index;
16628 
16629   if (sgp->loc != NULL) {
16630     sip = SeqLocId (sgp->loc);
16631     index += VisitSeqIdList (sip, userdata, callback);
16632   }
16633 
16634   return index;
16635 }
16636 
VisitSeqIdsInSeqAnnot(SeqAnnotPtr annot,Pointer userdata,VisitSeqIdFunc callback)16637 NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback)
16638 
16639 {
16640   Int4         index = 0;
16641   SeqAlignPtr  sap;
16642   SeqFeatPtr   sfp;
16643   SeqGraphPtr  sgp;
16644 
16645   if (annot == NULL || annot->data == NULL) return index;
16646 
16647   switch (annot->type) {
16648 
16649     case 1 :
16650       for (sfp = (SeqFeatPtr) annot->data; sfp != NULL; sfp = sfp->next) {
16651         index += VisitSeqIdsInSeqFeat (sfp, userdata, callback);
16652       }
16653       break;
16654 
16655     case 2 :
16656       for (sap = (SeqAlignPtr) annot->data; sap != NULL; sap = sap->next) {
16657         index += VisitSeqIdsInSeqAlign (sap, userdata, callback);
16658       }
16659       break;
16660 
16661     case 3 :
16662       for (sgp = (SeqGraphPtr) annot->data; sgp != NULL; sgp = sgp->next) {
16663         index += VisitSeqIdsInSeqGraph (sgp, userdata, callback);
16664       }
16665       break;
16666 
16667     default :
16668       break;
16669   }
16670 
16671   return index;
16672 }
16673 
VisitUserFieldsInUfp(UserFieldPtr ufp,Pointer userdata,VisitUserFieldsFunc callback)16674 NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback)
16675 
16676 {
16677   UserFieldPtr  curr;
16678   Int4          index = 0;
16679   Boolean       nested = FALSE;
16680 
16681   if (ufp == NULL) return index;
16682   if (ufp->choice == 11) {
16683     for (curr = (UserFieldPtr) ufp->data.ptrvalue; curr != NULL; curr = curr->next) {
16684       index += VisitUserFieldsInUfp (curr, userdata,callback);
16685       nested = TRUE;
16686     }
16687   }
16688   if (! nested) {
16689     if (callback != NULL) {
16690       callback (ufp, userdata);
16691     }
16692     index++;
16693   }
16694   return index;
16695 }
16696 
VisitUserFieldsInUop(UserObjectPtr uop,Pointer userdata,VisitUserFieldsFunc callback)16697 NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, VisitUserFieldsFunc callback)
16698 
16699 {
16700   Int4          index = 0;
16701   UserFieldPtr  ufp;
16702 
16703   if (uop == NULL) return index;
16704   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
16705     if (callback != NULL) {
16706       callback (ufp, userdata);
16707     }
16708     index++;
16709   }
16710   return index;
16711 }
16712 
16713 /* Visits only unnested nodes */
VisitUserObjectsInUop(UserObjectPtr uop,Pointer userdata,VisitUserObjectFunc callback)16714 NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback)
16715 
16716 {
16717   Int4           index = 0;
16718   Boolean        nested = FALSE;
16719   UserObjectPtr  obj;
16720   UserFieldPtr   ufp;
16721 
16722   if (uop == NULL) return index;
16723   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
16724     if (ufp->choice == 6) {
16725       obj = (UserObjectPtr) ufp->data.ptrvalue;
16726       index += VisitUserObjectsInUop (obj, userdata, callback);
16727       nested = TRUE;
16728     } else if (ufp->choice == 12) {
16729       for (obj = (UserObjectPtr) ufp->data.ptrvalue; obj != NULL; obj = obj->next) {
16730         index += VisitUserObjectsInUop (obj, userdata, callback);
16731       }
16732       nested = TRUE;
16733     }
16734   }
16735   if (! nested) {
16736     if (callback != NULL) {
16737       callback (uop, userdata);
16738     }
16739     index++;
16740   }
16741   return index;
16742 }
16743 
VisitAllUserObjectsInUop(UserObjectPtr uop,Pointer userdata,VisitUserObjectFunc callback)16744 NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback)
16745 
16746 {
16747   Int4           index = 0;
16748   UserObjectPtr  obj;
16749   UserFieldPtr   ufp;
16750 
16751   if (uop == NULL) return index;
16752   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
16753     if (ufp->choice == 6) {
16754       obj = (UserObjectPtr) ufp->data.ptrvalue;
16755       index += VisitAllUserObjectsInUop (obj, userdata, callback);
16756     } else if (ufp->choice == 12) {
16757       for (obj = (UserObjectPtr) ufp->data.ptrvalue; obj != NULL; obj = obj->next) {
16758         index += VisitAllUserObjectsInUop (obj, userdata, callback);
16759       }
16760     }
16761   }
16762   if (callback != NULL) {
16763     callback (uop, userdata);
16764   }
16765   index++;
16766   return index;
16767 }
16768 //LCOV_EXCL_STOP
16769 
16770 typedef struct uopdata {
16771   UserObjectPtr  rsult;
16772   CharPtr        tag;
16773 } UopData, PNTR UopDataPtr;
16774 
FindUopProc(UserObjectPtr uop,Pointer userdata)16775 static void FindUopProc (
16776   UserObjectPtr uop,
16777   Pointer userdata
16778 )
16779 
16780 {
16781   ObjectIdPtr  oip;
16782   UopDataPtr   udp;
16783 
16784   if (uop == NULL || userdata == NULL) return;
16785   oip = uop->type;
16786   if (oip == NULL) return;
16787   udp = (UopDataPtr) userdata;
16788   if (StringICmp (oip->str, udp->tag) != 0) return;
16789   udp->rsult = uop;
16790 }
16791 
FindUopByTag(UserObjectPtr top,CharPtr tag)16792 NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag)
16793 
16794 {
16795   UopData  ud;
16796 
16797   if (top == NULL || StringHasNoText (tag)) return NULL;
16798   ud.rsult = NULL;
16799   ud.tag = tag;
16800   VisitUserObjectsInUop (top, (Pointer) &ud, FindUopProc);
16801   return ud.rsult;
16802 }
16803 
16804 //LCOV_EXCL_START
CombineUserObjects(UserObjectPtr origuop,UserObjectPtr newuop)16805 NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop)
16806 
16807 {
16808   UserFieldPtr   prev = NULL;
16809   ObjectIdPtr    oip;
16810   UserFieldPtr   ufp;
16811   UserObjectPtr  uop;
16812 
16813   if (newuop == NULL) return origuop;
16814   if (origuop == NULL) return newuop;
16815 
16816   /* adding to an object that already chaperones at least two user objects */
16817 
16818   oip = origuop->type;
16819   if (oip != NULL && StringICmp (oip->str, "CombinedFeatureUserObjects") == 0) {
16820 
16821     for (ufp = origuop->data; ufp != NULL; ufp = ufp->next) {
16822       prev = ufp;
16823     }
16824 
16825     ufp = UserFieldNew ();
16826     oip = ObjectIdNew ();
16827     oip->id = 0;
16828     ufp->label = oip;
16829     ufp->choice = 6; /* user object */
16830     ufp->data.ptrvalue = (Pointer) newuop;
16831 
16832     /* link new set at end of list */
16833 
16834     if (prev != NULL) {
16835       prev->next = ufp;
16836     } else {
16837       origuop->data = ufp;
16838     }
16839     return origuop;
16840   }
16841 
16842   /* creating a new chaperone, link in first two user objects */
16843 
16844   uop = UserObjectNew ();
16845   oip = ObjectIdNew ();
16846   oip->str = StringSave ("CombinedFeatureUserObjects");
16847   uop->type = oip;
16848 
16849   ufp = UserFieldNew ();
16850   oip = ObjectIdNew ();
16851   oip->id = 0;
16852   ufp->label = oip;
16853   ufp->choice = 6; /* user object */
16854   ufp->data.ptrvalue = (Pointer) origuop;
16855   uop->data = ufp;
16856   prev = ufp;
16857 
16858   ufp = UserFieldNew ();
16859   oip = ObjectIdNew ();
16860   oip->id = 0;
16861   ufp->label = oip;
16862   ufp->choice = 6; /* user object */
16863   ufp->data.ptrvalue = (Pointer) newuop;
16864   prev->next = ufp;
16865 
16866   return uop;
16867 }
16868 
16869 
VisitDescriptorsProc(SeqDescrPtr descr,Pointer userdata,VisitDescriptorsFunc callback)16870 static Int4 VisitDescriptorsProc (SeqDescrPtr descr, Pointer userdata, VisitDescriptorsFunc callback)
16871 
16872 {
16873   Int4         index = 0;
16874   SeqDescrPtr  sdp;
16875 
16876   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
16877     if (callback != NULL) {
16878       callback (sdp, userdata);
16879     }
16880     index++;
16881   }
16882   return index;
16883 }
16884 
VisitDescriptorsOnBsp(BioseqPtr bsp,Pointer userdata,VisitDescriptorsFunc callback)16885 NLM_EXTERN Int4 VisitDescriptorsOnBsp (BioseqPtr bsp, Pointer userdata, VisitDescriptorsFunc callback)
16886 
16887 {
16888   Int4  index = 0;
16889 
16890   if (bsp == NULL) return index;
16891   index += VisitDescriptorsProc (bsp->descr, userdata, callback);
16892   return index;
16893 }
16894 
VisitDescriptorsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitDescriptorsFunc callback)16895 NLM_EXTERN Int4 VisitDescriptorsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitDescriptorsFunc callback)
16896 
16897 {
16898   Int4  index = 0;
16899 
16900   if (bssp == NULL) return index;
16901   index += VisitDescriptorsProc (bssp->descr, userdata, callback);
16902   return index;
16903 }
16904 
VisitDescriptorsInSet(BioseqSetPtr bssp,Pointer userdata,VisitDescriptorsFunc callback)16905 NLM_EXTERN Int4 VisitDescriptorsInSet (BioseqSetPtr bssp, Pointer userdata, VisitDescriptorsFunc callback)
16906 
16907 {
16908   Int4         index = 0;
16909   SeqEntryPtr  tmp;
16910 
16911   if (bssp == NULL) return index;
16912   index += VisitDescriptorsProc (bssp->descr, userdata, callback);
16913   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
16914     index += VisitDescriptorsInSep (tmp, userdata, callback);
16915   }
16916   return index;
16917 }
16918 
VisitDescriptorsOnSep(SeqEntryPtr sep,Pointer userdata,VisitDescriptorsFunc callback)16919 NLM_EXTERN Int4 VisitDescriptorsOnSep (SeqEntryPtr sep, Pointer userdata, VisitDescriptorsFunc callback)
16920 
16921 {
16922   BioseqPtr     bsp;
16923   BioseqSetPtr  bssp;
16924   Int4          index = 0;
16925 
16926   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
16927   if (IS_Bioseq (sep)) {
16928     bsp = (BioseqPtr) sep->data.ptrvalue;
16929     index += VisitDescriptorsOnBsp (bsp, userdata, callback);
16930   } else if (IS_Bioseq_set (sep)) {
16931     bssp = (BioseqSetPtr) sep->data.ptrvalue;
16932     index += VisitDescriptorsOnSet (bssp, userdata, callback);
16933   }
16934   return index;
16935 }
16936 
VisitDescriptorsInSep(SeqEntryPtr sep,Pointer userdata,VisitDescriptorsFunc callback)16937 NLM_EXTERN Int4 VisitDescriptorsInSep (SeqEntryPtr sep, Pointer userdata, VisitDescriptorsFunc callback)
16938 
16939 {
16940   BioseqPtr     bsp;
16941   BioseqSetPtr  bssp;
16942   Int4          index = 0;
16943 
16944   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
16945   if (IS_Bioseq (sep)) {
16946     bsp = (BioseqPtr) sep->data.ptrvalue;
16947     index += VisitDescriptorsOnBsp (bsp, userdata, callback);
16948   } else if (IS_Bioseq_set (sep)) {
16949     bssp = (BioseqSetPtr) sep->data.ptrvalue;
16950     index += VisitDescriptorsInSet (bssp, userdata, callback);
16951   }
16952   return index;
16953 }
16954 
16955 
VisitFeaturesProc(SeqAnnotPtr annot,Pointer userdata,VisitFeaturesFunc callback)16956 static Int4 VisitFeaturesProc (SeqAnnotPtr annot, Pointer userdata, VisitFeaturesFunc callback)
16957 
16958 {
16959   Int4         index = 0;
16960   SeqAnnotPtr  sap;
16961   SeqFeatPtr   sfp;
16962 
16963   for (sap = annot; sap != NULL; sap = sap->next) {
16964     if (sap->type != 1) continue;
16965     for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
16966       if (callback != NULL) {
16967         callback (sfp, userdata);
16968       }
16969       index++;
16970     }
16971   }
16972   return index;
16973 }
16974 
VisitFeaturesOnSap(SeqAnnotPtr sap,Pointer userdata,VisitFeaturesFunc callback)16975 NLM_EXTERN Int4 VisitFeaturesOnSap (SeqAnnotPtr sap, Pointer userdata, VisitFeaturesFunc callback)
16976 
16977 {
16978   Int4        index = 0;
16979   SeqFeatPtr  sfp;
16980 
16981   if (sap == NULL) return index;
16982   if (sap->type != 1) return index;
16983   for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
16984     if (callback != NULL) {
16985       callback (sfp, userdata);
16986     }
16987     index++;
16988   }
16989   return index;
16990 }
16991 
VisitFeaturesOnBsp(BioseqPtr bsp,Pointer userdata,VisitFeaturesFunc callback)16992 NLM_EXTERN Int4 VisitFeaturesOnBsp (BioseqPtr bsp, Pointer userdata, VisitFeaturesFunc callback)
16993 
16994 {
16995   Int4  index = 0;
16996 
16997   if (bsp == NULL) return index;
16998   index += VisitFeaturesProc (bsp->annot, userdata, callback);
16999   return index;
17000 }
17001 
VisitFeaturesOnSet(BioseqSetPtr bssp,Pointer userdata,VisitFeaturesFunc callback)17002 NLM_EXTERN Int4 VisitFeaturesOnSet (BioseqSetPtr bssp, Pointer userdata, VisitFeaturesFunc callback)
17003 
17004 {
17005   Int4  index = 0;
17006 
17007   if (bssp == NULL) return index;
17008   index += VisitFeaturesProc (bssp->annot, userdata, callback);
17009   return index;
17010 }
17011 
VisitFeaturesInSet(BioseqSetPtr bssp,Pointer userdata,VisitFeaturesFunc callback)17012 NLM_EXTERN Int4 VisitFeaturesInSet (BioseqSetPtr bssp, Pointer userdata, VisitFeaturesFunc callback)
17013 
17014 {
17015   Int4         index = 0;
17016   SeqEntryPtr  tmp;
17017 
17018   if (bssp == NULL) return index;
17019   index += VisitFeaturesProc (bssp->annot, userdata, callback);
17020   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17021     index += VisitFeaturesInSep (tmp, userdata, callback);
17022   }
17023   return index;
17024 }
17025 
VisitFeaturesOnSep(SeqEntryPtr sep,Pointer userdata,VisitFeaturesFunc callback)17026 NLM_EXTERN Int4 VisitFeaturesOnSep (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback)
17027 
17028 {
17029   BioseqPtr     bsp;
17030   BioseqSetPtr  bssp;
17031   Int4          index = 0;
17032 
17033   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17034   if (IS_Bioseq (sep)) {
17035     bsp = (BioseqPtr) sep->data.ptrvalue;
17036     index += VisitFeaturesOnBsp (bsp, userdata, callback);
17037   } else if (IS_Bioseq_set (sep)) {
17038     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17039     index += VisitFeaturesOnSet (bssp, userdata, callback);
17040   }
17041   return index;
17042 }
17043 
VisitFeaturesInSep(SeqEntryPtr sep,Pointer userdata,VisitFeaturesFunc callback)17044 NLM_EXTERN Int4 VisitFeaturesInSep (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback)
17045 
17046 {
17047   BioseqPtr     bsp;
17048   BioseqSetPtr  bssp;
17049   Int4          index = 0;
17050 
17051   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17052   if (IS_Bioseq (sep)) {
17053     bsp = (BioseqPtr) sep->data.ptrvalue;
17054     index += VisitFeaturesOnBsp (bsp, userdata, callback);
17055   } else if (IS_Bioseq_set (sep)) {
17056     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17057     index += VisitFeaturesInSet (bssp, userdata, callback);
17058   }
17059   return index;
17060 }
17061 
17062 
VisitAlignmentsOnDisc(Pointer segs,Pointer userdata,VisitAlignmentsFunc callback)17063 static Int4 VisitAlignmentsOnDisc (Pointer segs, Pointer userdata, VisitAlignmentsFunc callback)
17064 
17065 {
17066   Int4         index = 0;
17067   SeqAlignPtr  salp;
17068 
17069   for (salp = (SeqAlignPtr) segs; salp != NULL; salp = salp->next) {
17070     if (callback != NULL) {
17071       callback (salp, userdata);
17072     }
17073     index++;
17074     if (salp->segtype == SAS_DISC) {
17075       index += VisitAlignmentsOnDisc (salp->segs, userdata, callback);
17076     }
17077   }
17078   return index;
17079 }
17080 
VisitAlignmentsProc(SeqAnnotPtr annot,Pointer userdata,VisitAlignmentsFunc callback)17081 static Int4 VisitAlignmentsProc (SeqAnnotPtr annot, Pointer userdata, VisitAlignmentsFunc callback)
17082 
17083 {
17084   Int4         index = 0;
17085   SeqAlignPtr  salp;
17086   SeqAnnotPtr  sap;
17087 
17088   for (sap = annot; sap != NULL; sap = sap->next) {
17089     if (sap->type != 2) continue;
17090     for (salp = (SeqAlignPtr) sap->data; salp != NULL; salp = salp->next) {
17091       if (callback != NULL) {
17092         callback (salp, userdata);
17093       }
17094       index++;
17095       if (salp->segtype == SAS_DISC) {
17096         index += VisitAlignmentsOnDisc (salp->segs, userdata, callback);
17097       }
17098     }
17099   }
17100   return index;
17101 }
17102 
VisitAlignmentsOnSap(SeqAnnotPtr sap,Pointer userdata,VisitAlignmentsFunc callback)17103 NLM_EXTERN Int4 VisitAlignmentsOnSap (SeqAnnotPtr sap, Pointer userdata, VisitAlignmentsFunc callback)
17104 
17105 {
17106   Int4         index = 0;
17107   SeqAlignPtr  salp;
17108 
17109   if (sap == NULL) return index;
17110   if (sap->type != 2) return index;
17111   for (salp = (SeqAlignPtr) sap->data; salp != NULL; salp = salp->next) {
17112     if (callback != NULL) {
17113       callback (salp, userdata);
17114     }
17115     index++;
17116     if (salp->segtype == SAS_DISC) {
17117       index += VisitAlignmentsOnDisc (salp->segs, userdata, callback);
17118     }
17119   }
17120   return index;
17121 }
17122 
VisitAlignmentsOnBsp(BioseqPtr bsp,Pointer userdata,VisitAlignmentsFunc callback)17123 NLM_EXTERN Int4 VisitAlignmentsOnBsp (BioseqPtr bsp, Pointer userdata, VisitAlignmentsFunc callback)
17124 
17125 {
17126   Int4  index = 0;
17127 
17128   if (bsp == NULL) return index;
17129   index += VisitAlignmentsProc (bsp->annot, userdata, callback);
17130   return index;
17131 }
17132 
VisitAlignmentsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitAlignmentsFunc callback)17133 NLM_EXTERN Int4 VisitAlignmentsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitAlignmentsFunc callback)
17134 
17135 {
17136   Int4  index = 0;
17137 
17138   if (bssp == NULL) return index;
17139   index += VisitAlignmentsProc (bssp->annot, userdata, callback);
17140   return index;
17141 }
17142 
VisitAlignmentsInSet(BioseqSetPtr bssp,Pointer userdata,VisitAlignmentsFunc callback)17143 NLM_EXTERN Int4 VisitAlignmentsInSet (BioseqSetPtr bssp, Pointer userdata, VisitAlignmentsFunc callback)
17144 
17145 {
17146   Int4         index = 0;
17147   SeqEntryPtr  tmp;
17148 
17149   if (bssp == NULL) return index;
17150   index += VisitAlignmentsProc (bssp->annot, userdata, callback);
17151   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17152     index += VisitAlignmentsInSep (tmp, userdata, callback);
17153   }
17154   return index;
17155 }
17156 
VisitAlignmentsOnSep(SeqEntryPtr sep,Pointer userdata,VisitAlignmentsFunc callback)17157 NLM_EXTERN Int4 VisitAlignmentsOnSep (SeqEntryPtr sep, Pointer userdata, VisitAlignmentsFunc callback)
17158 
17159 {
17160   BioseqPtr     bsp;
17161   BioseqSetPtr  bssp;
17162   Int4          index = 0;
17163 
17164   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17165   if (IS_Bioseq (sep)) {
17166     bsp = (BioseqPtr) sep->data.ptrvalue;
17167     index += VisitAlignmentsOnBsp (bsp, userdata, callback);
17168   } else if (IS_Bioseq_set (sep)) {
17169     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17170     index += VisitAlignmentsOnSet (bssp, userdata, callback);
17171   }
17172   return index;
17173 }
17174 
VisitAlignmentsInSep(SeqEntryPtr sep,Pointer userdata,VisitAlignmentsFunc callback)17175 NLM_EXTERN Int4 VisitAlignmentsInSep (SeqEntryPtr sep, Pointer userdata, VisitAlignmentsFunc callback)
17176 
17177 {
17178   BioseqPtr     bsp;
17179   BioseqSetPtr  bssp;
17180   Int4          index = 0;
17181 
17182   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17183   if (IS_Bioseq (sep)) {
17184     bsp = (BioseqPtr) sep->data.ptrvalue;
17185     index += VisitAlignmentsOnBsp (bsp, userdata, callback);
17186   } else if (IS_Bioseq_set (sep)) {
17187     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17188     index += VisitAlignmentsInSet (bssp, userdata, callback);
17189   }
17190   return index;
17191 }
17192 
17193 
VisitGraphsProc(SeqAnnotPtr annot,Pointer userdata,VisitGraphsFunc callback)17194 static Int4 VisitGraphsProc (SeqAnnotPtr annot, Pointer userdata, VisitGraphsFunc callback)
17195 
17196 {
17197   Int4         index = 0;
17198   SeqAnnotPtr  sap;
17199   SeqGraphPtr  sgp;
17200 
17201   for (sap = annot; sap != NULL; sap = sap->next) {
17202     if (sap->type != 3) continue;
17203     for (sgp = (SeqGraphPtr) sap->data; sgp != NULL; sgp = sgp->next) {
17204       if (callback != NULL) {
17205         callback (sgp, userdata);
17206       }
17207       index++;
17208     }
17209   }
17210   return index;
17211 }
17212 
VisitGraphsOnSap(SeqAnnotPtr sap,Pointer userdata,VisitGraphsFunc callback)17213 NLM_EXTERN Int4 VisitGraphsOnSap (SeqAnnotPtr sap, Pointer userdata, VisitGraphsFunc callback)
17214 
17215 {
17216   Int4         index = 0;
17217   SeqGraphPtr  sgp;
17218 
17219   if (sap == NULL) return index;
17220   if (sap->type != 3) return index;
17221   for (sgp = (SeqGraphPtr) sap->data; sgp != NULL; sgp = sgp->next) {
17222     if (callback != NULL) {
17223       callback (sgp, userdata);
17224     }
17225     index++;
17226   }
17227   return index;
17228 }
17229 
VisitGraphsOnBsp(BioseqPtr bsp,Pointer userdata,VisitGraphsFunc callback)17230 NLM_EXTERN Int4 VisitGraphsOnBsp (BioseqPtr bsp, Pointer userdata, VisitGraphsFunc callback)
17231 
17232 {
17233   Int4  index = 0;
17234 
17235   if (bsp == NULL) return index;
17236   index += VisitGraphsProc (bsp->annot, userdata, callback);
17237   return index;
17238 }
17239 
VisitGraphsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitGraphsFunc callback)17240 NLM_EXTERN Int4 VisitGraphsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitGraphsFunc callback)
17241 
17242 {
17243   Int4  index = 0;
17244 
17245   if (bssp == NULL) return index;
17246   index += VisitGraphsProc (bssp->annot, userdata, callback);
17247   return index;
17248 }
17249 
VisitGraphsInSet(BioseqSetPtr bssp,Pointer userdata,VisitGraphsFunc callback)17250 NLM_EXTERN Int4 VisitGraphsInSet (BioseqSetPtr bssp, Pointer userdata, VisitGraphsFunc callback)
17251 
17252 {
17253   Int4         index = 0;
17254   SeqEntryPtr  tmp;
17255 
17256   if (bssp == NULL) return index;
17257   index += VisitGraphsProc (bssp->annot, userdata, callback);
17258   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17259     index += VisitGraphsInSep (tmp, userdata, callback);
17260   }
17261   return index;
17262 }
17263 
VisitGraphsOnSep(SeqEntryPtr sep,Pointer userdata,VisitGraphsFunc callback)17264 NLM_EXTERN Int4 VisitGraphsOnSep (SeqEntryPtr sep, Pointer userdata, VisitGraphsFunc callback)
17265 
17266 {
17267   BioseqPtr     bsp;
17268   BioseqSetPtr  bssp;
17269   Int4          index = 0;
17270 
17271   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17272   if (IS_Bioseq (sep)) {
17273     bsp = (BioseqPtr) sep->data.ptrvalue;
17274     index += VisitGraphsOnBsp (bsp, userdata, callback);
17275   } else if (IS_Bioseq_set (sep)) {
17276     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17277     index += VisitGraphsOnSet (bssp, userdata, callback);
17278   }
17279   return index;
17280 }
17281 
VisitGraphsInSep(SeqEntryPtr sep,Pointer userdata,VisitGraphsFunc callback)17282 NLM_EXTERN Int4 VisitGraphsInSep (SeqEntryPtr sep, Pointer userdata, VisitGraphsFunc callback)
17283 
17284 {
17285   BioseqPtr     bsp;
17286   BioseqSetPtr  bssp;
17287   Int4          index = 0;
17288 
17289   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17290   if (IS_Bioseq (sep)) {
17291     bsp = (BioseqPtr) sep->data.ptrvalue;
17292     index += VisitGraphsOnBsp (bsp, userdata, callback);
17293   } else if (IS_Bioseq_set (sep)) {
17294     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17295     index += VisitGraphsInSet (bssp, userdata, callback);
17296   }
17297   return index;
17298 }
17299 
17300 
VisitAnnotsProc(SeqAnnotPtr annot,Pointer userdata,VisitAnnotsFunc callback)17301 static Int4 VisitAnnotsProc (SeqAnnotPtr annot, Pointer userdata, VisitAnnotsFunc callback)
17302 
17303 {
17304   Int4         index = 0;
17305   SeqAnnotPtr  sap;
17306 
17307   for (sap = annot; sap != NULL; sap = sap->next) {
17308     if (callback != NULL) {
17309       callback (sap, userdata);
17310     }
17311     index++;
17312   }
17313   return index;
17314 }
17315 
VisitAnnotsOnBsp(BioseqPtr bsp,Pointer userdata,VisitAnnotsFunc callback)17316 NLM_EXTERN Int4 VisitAnnotsOnBsp (BioseqPtr bsp, Pointer userdata, VisitAnnotsFunc callback)
17317 
17318 {
17319   Int4  index = 0;
17320 
17321   if (bsp == NULL) return index;
17322   index += VisitAnnotsProc (bsp->annot, userdata, callback);
17323   return index;
17324 }
17325 
VisitAnnotsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitAnnotsFunc callback)17326 NLM_EXTERN Int4 VisitAnnotsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitAnnotsFunc callback)
17327 
17328 {
17329   Int4  index = 0;
17330 
17331   if (bssp == NULL) return index;
17332   index += VisitAnnotsProc (bssp->annot, userdata, callback);
17333   return index;
17334 }
17335 
VisitAnnotsInSet(BioseqSetPtr bssp,Pointer userdata,VisitAnnotsFunc callback)17336 NLM_EXTERN Int4 VisitAnnotsInSet (BioseqSetPtr bssp, Pointer userdata, VisitAnnotsFunc callback)
17337 
17338 {
17339   Int4         index = 0;
17340   SeqEntryPtr  tmp;
17341 
17342   if (bssp == NULL) return index;
17343   index += VisitAnnotsProc (bssp->annot, userdata, callback);
17344   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17345     index += VisitAnnotsInSep (tmp, userdata, callback);
17346   }
17347   return index;
17348 }
17349 
VisitAnnotsOnSep(SeqEntryPtr sep,Pointer userdata,VisitAnnotsFunc callback)17350 NLM_EXTERN Int4 VisitAnnotsOnSep (SeqEntryPtr sep, Pointer userdata, VisitAnnotsFunc callback)
17351 
17352 {
17353   BioseqPtr     bsp;
17354   BioseqSetPtr  bssp;
17355   Int4          index = 0;
17356 
17357   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17358   if (IS_Bioseq (sep)) {
17359     bsp = (BioseqPtr) sep->data.ptrvalue;
17360     index += VisitAnnotsOnBsp (bsp, userdata, callback);
17361   } else if (IS_Bioseq_set (sep)) {
17362     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17363     index += VisitAnnotsOnSet (bssp, userdata, callback);
17364   }
17365   return index;
17366 }
17367 
VisitAnnotsInSep(SeqEntryPtr sep,Pointer userdata,VisitAnnotsFunc callback)17368 NLM_EXTERN Int4 VisitAnnotsInSep (SeqEntryPtr sep, Pointer userdata, VisitAnnotsFunc callback)
17369 
17370 {
17371   BioseqPtr     bsp;
17372   BioseqSetPtr  bssp;
17373   Int4          index = 0;
17374 
17375   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17376   if (IS_Bioseq (sep)) {
17377     bsp = (BioseqPtr) sep->data.ptrvalue;
17378     index += VisitAnnotsOnBsp (bsp, userdata, callback);
17379   } else if (IS_Bioseq_set (sep)) {
17380     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17381     index += VisitAnnotsInSet (bssp, userdata, callback);
17382   }
17383   return index;
17384 }
17385 
17386 
VisitAuthorsProc(AuthListPtr alp,Pointer userdata,VisitAuthorFunc callback)17387 static Int4 VisitAuthorsProc (AuthListPtr alp, Pointer userdata, VisitAuthorFunc callback)
17388 
17389 {
17390   AuthorPtr    ap;
17391   Int4         index = 0;
17392   ValNodePtr   names;
17393   NameStdPtr   nsp;
17394   PersonIdPtr  pid;
17395 
17396   if (alp == NULL || alp->choice != 1) return index;
17397 
17398   for (names = alp->names; names != NULL; names = names->next) {
17399     ap = names->data.ptrvalue;
17400     if (ap == NULL) continue;
17401     pid = ap->name;
17402     if (pid == NULL || pid->choice != 2) continue;
17403     nsp = pid->data;
17404     if (nsp == NULL) continue;
17405     if (callback != NULL) {
17406       callback (nsp, userdata);
17407     }
17408     index++;
17409   }
17410 
17411   return index;
17412 }
17413 
VisitAuthorsInPub(PubdescPtr pdp,Pointer userdata,VisitAuthorFunc callback)17414 NLM_EXTERN Int4 VisitAuthorsInPub (PubdescPtr pdp, Pointer userdata, VisitAuthorFunc callback)
17415 
17416 {
17417   CitArtPtr   cap;
17418   CitBookPtr  cbp;
17419   CitGenPtr   cgp;
17420   CitPatPtr   cpp;
17421   CitSubPtr   csp;
17422   Int4        index = 0;
17423   ValNodePtr  vnp;
17424 
17425   if (pdp == NULL) return index;
17426 
17427   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
17428     if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) continue;
17429     if (vnp->data.ptrvalue == NULL) continue;
17430     switch (vnp->choice) {
17431       case PUB_Gen :
17432         cgp = (CitGenPtr) vnp->data.ptrvalue;
17433         index += VisitAuthorsProc (cgp->authors, userdata, callback);
17434         break;
17435       case PUB_Sub :
17436         csp = (CitSubPtr) vnp->data.ptrvalue;
17437         index += VisitAuthorsProc (csp->authors, userdata, callback);
17438         break;
17439       case PUB_Article :
17440         cap = (CitArtPtr) vnp->data.ptrvalue;
17441         index += VisitAuthorsProc (cap->authors, userdata, callback);
17442         if (cap->from == 2 || cap->from == 3) {
17443           cbp = (CitBookPtr) cap->fromptr;
17444           if (cbp != NULL) {
17445             index += VisitAuthorsProc (cbp->authors, userdata, callback);
17446           }
17447         }
17448         break;
17449       case PUB_Book :
17450         cbp = (CitBookPtr) vnp->data.ptrvalue;
17451         index += VisitAuthorsProc (cbp->authors, userdata, callback);
17452         break;
17453       case PUB_Man :
17454         cbp = (CitBookPtr) vnp->data.ptrvalue;
17455         if (cbp->othertype == 2 && cbp->let_type == 3) {
17456           index += VisitAuthorsProc (cbp->authors, userdata, callback);
17457         }
17458         break;
17459       case PUB_Patent :
17460         cpp = (CitPatPtr) vnp->data.ptrvalue;
17461         index += VisitAuthorsProc (cpp->authors, userdata, callback);
17462         index += VisitAuthorsProc (cpp->applicants, userdata, callback);
17463         index += VisitAuthorsProc (cpp->assignees, userdata, callback);
17464         break;
17465       default :
17466         break;
17467     }
17468   }
17469 
17470   return index;
17471 }
17472 
17473 
VisitPubdescsProc(SeqDescrPtr descr,SeqAnnotPtr annot,Pointer userdata,VisitPubdescsFunc callback)17474 static Int4 VisitPubdescsProc (SeqDescrPtr descr, SeqAnnotPtr annot, Pointer userdata, VisitPubdescsFunc callback)
17475 
17476 {
17477   Int4         index = 0;
17478   PubdescPtr   pdp;
17479   SeqAnnotPtr  sap;
17480   SeqDescrPtr  sdp;
17481   SeqFeatPtr   sfp;
17482 
17483   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
17484     if (sdp->choice == Seq_descr_pub) {
17485       pdp = (PubdescPtr) sdp->data.ptrvalue;
17486       if (pdp != NULL) {
17487         if (callback != NULL) {
17488           callback (pdp, userdata);
17489         }
17490         index++;
17491       }
17492     }
17493   }
17494   for (sap = annot; sap != NULL; sap = sap->next) {
17495     if (sap->type != 1) continue;
17496     for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
17497       if (sfp->data.choice == SEQFEAT_PUB) {
17498         pdp = (PubdescPtr) sfp->data.value.ptrvalue;
17499         if (pdp != NULL) {
17500           if (callback != NULL) {
17501             callback (pdp, userdata);
17502           }
17503           index++;
17504         }
17505       }
17506     }
17507   }
17508   return index;
17509 }
17510 
VisitPubdescsOnBsp(BioseqPtr bsp,Pointer userdata,VisitPubdescsFunc callback)17511 NLM_EXTERN Int4 VisitPubdescsOnBsp (BioseqPtr bsp, Pointer userdata, VisitPubdescsFunc callback)
17512 
17513 {
17514   Int4  index = 0;
17515 
17516   if (bsp == NULL) return index;
17517   index += VisitPubdescsProc (bsp->descr, bsp->annot, userdata, callback);
17518   return index;
17519 }
17520 
VisitPubdescsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitPubdescsFunc callback)17521 NLM_EXTERN Int4 VisitPubdescsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitPubdescsFunc callback)
17522 
17523 {
17524   Int4  index = 0;
17525 
17526   if (bssp == NULL) return index;
17527   index += VisitPubdescsProc (bssp->descr, bssp->annot, userdata, callback);
17528   return index;
17529 }
17530 
VisitPubdescsInSet(BioseqSetPtr bssp,Pointer userdata,VisitPubdescsFunc callback)17531 NLM_EXTERN Int4 VisitPubdescsInSet (BioseqSetPtr bssp, Pointer userdata, VisitPubdescsFunc callback)
17532 
17533 {
17534   Int4         index = 0;
17535   SeqEntryPtr  tmp;
17536 
17537   if (bssp == NULL) return index;
17538   index += VisitPubdescsProc (bssp->descr, bssp->annot, userdata, callback);
17539   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17540     index += VisitPubdescsInSep (tmp, userdata, callback);
17541   }
17542   return index;
17543 }
17544 
VisitPubdescsOnSep(SeqEntryPtr sep,Pointer userdata,VisitPubdescsFunc callback)17545 NLM_EXTERN Int4 VisitPubdescsOnSep (SeqEntryPtr sep, Pointer userdata, VisitPubdescsFunc callback)
17546 
17547 {
17548   BioseqPtr     bsp;
17549   BioseqSetPtr  bssp;
17550   Int4          index = 0;
17551 
17552   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17553   if (IS_Bioseq (sep)) {
17554     bsp = (BioseqPtr) sep->data.ptrvalue;
17555     index += VisitPubdescsOnBsp (bsp, userdata, callback);
17556   } else if (IS_Bioseq_set (sep)) {
17557     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17558     index += VisitPubdescsOnSet (bssp, userdata, callback);
17559   }
17560   return index;
17561 }
17562 
VisitPubdescsInSep(SeqEntryPtr sep,Pointer userdata,VisitPubdescsFunc callback)17563 NLM_EXTERN Int4 VisitPubdescsInSep (SeqEntryPtr sep, Pointer userdata, VisitPubdescsFunc callback)
17564 
17565 {
17566   BioseqPtr     bsp;
17567   BioseqSetPtr  bssp;
17568   Int4          index = 0;
17569 
17570   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17571   if (IS_Bioseq (sep)) {
17572     bsp = (BioseqPtr) sep->data.ptrvalue;
17573     index += VisitPubdescsOnBsp (bsp, userdata, callback);
17574   } else if (IS_Bioseq_set (sep)) {
17575     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17576     index += VisitPubdescsInSet (bssp, userdata, callback);
17577   }
17578   return index;
17579 }
17580 
17581 
VisitBioSourcesProc(SeqDescrPtr descr,SeqAnnotPtr annot,Pointer userdata,VisitBioSourcesFunc callback)17582 static Int4 VisitBioSourcesProc (SeqDescrPtr descr, SeqAnnotPtr annot, Pointer userdata, VisitBioSourcesFunc callback)
17583 
17584 {
17585   BioSourcePtr  biop;
17586   Int4          index = 0;
17587   SeqAnnotPtr   sap;
17588   SeqDescrPtr   sdp;
17589   SeqFeatPtr    sfp;
17590 
17591   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
17592     if (sdp->choice == Seq_descr_source) {
17593       biop = (BioSourcePtr) sdp->data.ptrvalue;
17594       if (biop != NULL) {
17595         if (callback != NULL) {
17596           callback (biop, userdata);
17597         }
17598         index++;
17599       }
17600     }
17601   }
17602   for (sap = annot; sap != NULL; sap = sap->next) {
17603     if (sap->type != 1) continue;
17604     for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
17605       if (sfp->data.choice == SEQFEAT_BIOSRC) {
17606         biop = (BioSourcePtr) sfp->data.value.ptrvalue;
17607         if (biop != NULL) {
17608           if (callback != NULL) {
17609             callback (biop, userdata);
17610           }
17611           index++;
17612         }
17613       }
17614     }
17615   }
17616   return index;
17617 }
17618 
VisitBioSourcesOnBsp(BioseqPtr bsp,Pointer userdata,VisitBioSourcesFunc callback)17619 NLM_EXTERN Int4 VisitBioSourcesOnBsp (BioseqPtr bsp, Pointer userdata, VisitBioSourcesFunc callback)
17620 
17621 {
17622   Int4  index = 0;
17623 
17624   if (bsp == NULL) return index;
17625   index += VisitBioSourcesProc (bsp->descr, bsp->annot, userdata, callback);
17626   return index;
17627 }
17628 
VisitBioSourcesOnSet(BioseqSetPtr bssp,Pointer userdata,VisitBioSourcesFunc callback)17629 NLM_EXTERN Int4 VisitBioSourcesOnSet (BioseqSetPtr bssp, Pointer userdata, VisitBioSourcesFunc callback)
17630 
17631 {
17632   Int4  index = 0;
17633 
17634   if (bssp == NULL) return index;
17635   index += VisitBioSourcesProc (bssp->descr, bssp->annot, userdata, callback);
17636   return index;
17637 }
17638 
VisitBioSourcesInSet(BioseqSetPtr bssp,Pointer userdata,VisitBioSourcesFunc callback)17639 NLM_EXTERN Int4 VisitBioSourcesInSet (BioseqSetPtr bssp, Pointer userdata, VisitBioSourcesFunc callback)
17640 
17641 {
17642   Int4         index = 0;
17643   SeqEntryPtr  tmp;
17644 
17645   if (bssp == NULL) return index;
17646   index += VisitBioSourcesProc (bssp->descr, bssp->annot, userdata, callback);
17647   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17648     index += VisitBioSourcesInSep (tmp, userdata, callback);
17649   }
17650   return index;
17651 }
17652 
VisitBioSourcesOnSep(SeqEntryPtr sep,Pointer userdata,VisitBioSourcesFunc callback)17653 NLM_EXTERN Int4 VisitBioSourcesOnSep (SeqEntryPtr sep, Pointer userdata, VisitBioSourcesFunc callback)
17654 
17655 {
17656   BioseqPtr     bsp;
17657   BioseqSetPtr  bssp;
17658   Int4          index = 0;
17659 
17660   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17661   if (IS_Bioseq (sep)) {
17662     bsp = (BioseqPtr) sep->data.ptrvalue;
17663     index += VisitBioSourcesOnBsp (bsp, userdata, callback);
17664   } else if (IS_Bioseq_set (sep)) {
17665     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17666     index += VisitBioSourcesOnSet (bssp, userdata, callback);
17667   }
17668   return index;
17669 }
17670 
VisitBioSourcesInSep(SeqEntryPtr sep,Pointer userdata,VisitBioSourcesFunc callback)17671 NLM_EXTERN Int4 VisitBioSourcesInSep (SeqEntryPtr sep, Pointer userdata, VisitBioSourcesFunc callback)
17672 
17673 {
17674   BioseqPtr     bsp;
17675   BioseqSetPtr  bssp;
17676   Int4          index = 0;
17677 
17678   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17679   if (IS_Bioseq (sep)) {
17680     bsp = (BioseqPtr) sep->data.ptrvalue;
17681     index += VisitBioSourcesOnBsp (bsp, userdata, callback);
17682   } else if (IS_Bioseq_set (sep)) {
17683     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17684     index += VisitBioSourcesInSet (bssp, userdata, callback);
17685   }
17686   return index;
17687 }
17688 
17689 
VisitBioseqsInSet(BioseqSetPtr bssp,Pointer userdata,VisitBioseqsFunc callback)17690 NLM_EXTERN Int4 VisitBioseqsInSet (BioseqSetPtr bssp, Pointer userdata, VisitBioseqsFunc callback)
17691 
17692 {
17693   Int4         index = 0;
17694   SeqEntryPtr  tmp;
17695 
17696   if (bssp == NULL) return index;
17697   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17698     index += VisitBioseqsInSep (tmp, userdata, callback);
17699   }
17700   return index;
17701 }
17702 
VisitBioseqsInSep(SeqEntryPtr sep,Pointer userdata,VisitBioseqsFunc callback)17703 NLM_EXTERN Int4 VisitBioseqsInSep (SeqEntryPtr sep, Pointer userdata, VisitBioseqsFunc callback)
17704 
17705 {
17706   BioseqPtr     bsp;
17707   BioseqSetPtr  bssp;
17708   Int4          index = 0;
17709 
17710   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17711   if (IS_Bioseq (sep)) {
17712     bsp = (BioseqPtr) sep->data.ptrvalue;
17713     if (callback != NULL) {
17714       callback (bsp, userdata);
17715     }
17716     index++;
17717   } else if (IS_Bioseq_set (sep)) {
17718     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17719     index += VisitBioseqsInSet (bssp, userdata, callback);
17720   }
17721   return index;
17722 }
17723 
VisitSequencesInSet(BioseqSetPtr bssp,Pointer userdata,Int2 filter,VisitSequencesFunc callback)17724 NLM_EXTERN Int4 VisitSequencesInSet (BioseqSetPtr bssp, Pointer userdata, Int2 filter, VisitSequencesFunc callback)
17725 
17726 {
17727   Int4         index = 0;
17728   SeqEntryPtr  tmp;
17729 
17730   if (bssp == NULL) return index;
17731   if (bssp->_class == BioseqseqSet_class_parts) {
17732     if (filter != VISIT_PARTS) return index;
17733     filter = VISIT_MAINS;
17734   }
17735   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17736     index += VisitSequencesInSep (tmp, userdata, filter, callback);
17737   }
17738   return index;
17739 }
17740 
VisitSequencesInSep(SeqEntryPtr sep,Pointer userdata,Int2 filter,VisitSequencesFunc callback)17741 NLM_EXTERN Int4 VisitSequencesInSep (SeqEntryPtr sep, Pointer userdata, Int2 filter, VisitSequencesFunc callback)
17742 
17743 {
17744   BioseqPtr     bsp;
17745   BioseqSetPtr  bssp;
17746   Int4          index = 0;
17747 
17748   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17749   if (IS_Bioseq (sep)) {
17750     bsp = (BioseqPtr) sep->data.ptrvalue;
17751     if (filter == VISIT_MAINS ||
17752         (filter == VISIT_NUCS && ISA_na (bsp->mol)) ||
17753         (filter == VISIT_PROTS && ISA_aa (bsp->mol))) {
17754       if (callback != NULL) {
17755         callback (bsp, userdata);
17756       }
17757       index++;
17758     }
17759   } else if (IS_Bioseq_set (sep)) {
17760     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17761     index += VisitSequencesInSet (bssp, userdata, filter, callback);
17762   }
17763   return index;
17764 }
17765 
VisitSetsInSet(BioseqSetPtr bssp,Pointer userdata,VisitSetsFunc callback)17766 NLM_EXTERN Int4 VisitSetsInSet (BioseqSetPtr bssp, Pointer userdata, VisitSetsFunc callback)
17767 
17768 {
17769   Int4         index = 0;
17770   SeqEntryPtr  tmp;
17771 
17772   if (bssp == NULL) return index;
17773   if (callback != NULL) {
17774     callback (bssp, userdata);
17775   }
17776   index++;
17777   for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17778     index += VisitSetsInSep (tmp, userdata, callback);
17779   }
17780   return index;
17781 }
17782 
VisitSetsInSep(SeqEntryPtr sep,Pointer userdata,VisitSetsFunc callback)17783 NLM_EXTERN Int4 VisitSetsInSep (SeqEntryPtr sep, Pointer userdata, VisitSetsFunc callback)
17784 
17785 {
17786   BioseqSetPtr  bssp;
17787   Int4          index = 0;
17788 
17789   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17790   if (IS_Bioseq_set (sep)) {
17791     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17792     index += VisitSetsInSet (bssp, userdata, callback);
17793   }
17794   return index;
17795 }
17796 
VisitElementsInSep(SeqEntryPtr sep,Pointer userdata,VisitElementsFunc callback)17797 NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElementsFunc callback)
17798 
17799 {
17800   BioseqSetPtr  bssp;
17801   Int4          index = 0;
17802   SeqEntryPtr   tmp;
17803 
17804   if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17805   if (IS_Bioseq_set (sep)) {
17806     bssp = (BioseqSetPtr) sep->data.ptrvalue;
17807     if (bssp == NULL) return index;
17808     if (bssp->_class == 7 ||
17809         (bssp->_class >= 13 && bssp->_class <= 16) ||
17810         bssp->_class == BioseqseqSet_class_wgs_set ||
17811         bssp->_class == BioseqseqSet_class_gen_prod_set ||
17812         bssp->_class == BioseqseqSet_class_small_genome_set) {
17813       for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17814         index += VisitElementsInSep (tmp, userdata, callback);
17815       }
17816       return index;
17817     }
17818   }
17819   if (callback != NULL) {
17820     callback (sep, userdata);
17821   }
17822   index++;
17823   return index;
17824 }
17825 
IsPopPhyEtcSet(Uint1 _class)17826 NLM_EXTERN Boolean IsPopPhyEtcSet (Uint1 _class)
17827 
17828 {
17829   if (_class == BioseqseqSet_class_mut_set ||
17830       _class == BioseqseqSet_class_pop_set ||
17831       _class == BioseqseqSet_class_phy_set ||
17832       _class == BioseqseqSet_class_eco_set ||
17833       _class == BioseqseqSet_class_wgs_set ||
17834       _class == BioseqseqSet_class_small_genome_set) return TRUE;
17835   return FALSE;
17836 }
17837 
17838 
CleanupStringsForOneDescriptor(SeqDescPtr sdp,SeqEntryPtr sep)17839 NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep)
17840 {
17841   Boolean stripSerial = FALSE;
17842   Boolean isEmblOrDdbj = FALSE;
17843 
17844   if (sdp == NULL) {
17845     return;
17846   }
17847   SeqEntryExplore (sep, (Pointer) &stripSerial, CheckForSwissProtID);
17848   SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
17849 
17850   if (sdp->choice == Seq_descr_pub) {
17851     FlattenPubdesc (sdp->data.ptrvalue, NULL);
17852   }
17853 
17854   CleanupDescriptorStrings (sdp, stripSerial, TRUE, NULL, isEmblOrDdbj);
17855 }
17856 
17857 
CleanupOneSeqFeat(SeqFeatPtr sfp)17858 NLM_EXTERN void CleanupOneSeqFeat (SeqFeatPtr sfp)
17859 {
17860   Boolean           isEmblOrDdbj = FALSE;
17861   Boolean           isJscan = FALSE;
17862   Boolean           stripSerial = TRUE;
17863   ValNodePtr        publist = NULL;
17864   SeqEntryPtr       sep;
17865 
17866   if (sfp->idx.entityID == 0) {
17867     return;
17868   }
17869   sep = GetTopSeqEntryForEntityID (sfp->idx.entityID);
17870 
17871   SeqEntryExplore (sep, (Pointer) &stripSerial, CheckForSwissProtID);
17872   SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
17873   SeqEntryExplore (sep, (Pointer) &isJscan, CheckForJournalScanID);
17874   FlattenSfpCit (sfp, NULL);
17875   CleanUpSeqFeat (sfp, isEmblOrDdbj, isJscan, stripSerial, TRUE, &publist);
17876 
17877   if (publist != NULL) {
17878    ChangeCitsOnFeats (sfp, publist);
17879   }
17880   ValNodeFreeData (publist);
17881 }
17882 //LCOV_EXCL_STOP
17883 
RemoveFeatureLink(SeqFeatPtr sfp1,SeqFeatPtr sfp2)17884 NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2)
17885 {
17886   SeqFeatXrefPtr  xref, next, PNTR prevlink;
17887   ObjectIdPtr     oip;
17888   SeqFeatPtr      link_sfp;
17889   Char            buf [32];
17890   CharPtr         str = NULL;
17891 
17892   if (sfp1 == NULL) return;
17893 
17894   prevlink = (SeqFeatXrefPtr PNTR) &(sfp1->xref);
17895   xref = sfp1->xref;
17896   while (xref != NULL) {
17897     next = xref->next;
17898     link_sfp = NULL;
17899 
17900     if (xref->id.choice == 3) {
17901       oip = (ObjectIdPtr) xref->id.value.ptrvalue;
17902       if (oip != NULL) {
17903         if (StringDoesHaveText (oip->str)) {
17904           str = oip->str;
17905         } else {
17906           sprintf (buf, "%ld", (long) oip->id);
17907           str = buf;
17908         }
17909         link_sfp = SeqMgrGetFeatureByFeatID (sfp1->idx.entityID, NULL, str, NULL, NULL);
17910       }
17911     }
17912     if (link_sfp == sfp2) {
17913       *prevlink = xref->next;
17914       xref->next = NULL;
17915       MemFree (xref);
17916     } else {
17917       prevlink = (SeqFeatXrefPtr PNTR) &(xref->next);
17918     }
17919 
17920     xref = next;
17921   }
17922 }
17923 
17924 
LinkTwoFeatures(SeqFeatPtr dst,SeqFeatPtr sfp)17925 NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp)
17926 
17927 {
17928   ChoicePtr       cp;
17929   ObjectIdPtr     oip;
17930   SeqFeatXrefPtr  xref, prev_xref, next_xref;
17931   SeqFeatPtr      old_match;
17932 
17933   if (dst == NULL || sfp == NULL) return;
17934 
17935   cp = &(dst->id);
17936   if (cp == NULL) return;
17937   if (cp->choice == 3) {
17938     /* don't create a duplicate xref, remove links to other features */
17939     xref = sfp->xref;
17940     prev_xref = NULL;
17941     while (xref != NULL) {
17942       next_xref = xref->next;
17943       if (xref->id.choice == 3 && xref->id.value.ptrvalue != NULL) {
17944         if (ObjectIdMatch (cp->value.ptrvalue, xref->id.value.ptrvalue)) {
17945           /* already have this xref */
17946           return;
17947         } else {
17948           old_match = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
17949           RemoveFeatureLink (sfp, old_match);
17950           RemoveFeatureLink (old_match, sfp);
17951         }
17952       } else {
17953         prev_xref = xref;
17954       }
17955       xref = next_xref;
17956     }
17957 
17958     oip = (ObjectIdPtr) cp->value.ptrvalue;
17959     if (oip != NULL) {
17960       oip = AsnIoMemCopy (oip, (AsnReadFunc) ObjectIdAsnRead,
17961                           (AsnWriteFunc) ObjectIdAsnWrite);
17962       if (oip != NULL) {
17963         xref = SeqFeatXrefNew ();
17964         if (xref != NULL) {
17965           xref->id.choice = 3;
17966           xref->id.value.ptrvalue = (Pointer) oip;
17967           xref->next = sfp->xref;
17968           sfp->xref = xref;
17969         }
17970       }
17971     }
17972   }
17973 }
17974 
17975 /* basic cleanup code from sqnutil3.c */
17976 
ConvertSourceFeatDescProc(SeqFeatPtr sfp,Pointer userdata)17977 extern void ConvertSourceFeatDescProc (SeqFeatPtr sfp, Pointer userdata)
17978 
17979 {
17980   BioSourcePtr   biop;
17981   BioseqPtr      bsp;
17982   SubSourcePtr   lastssp;
17983   ObjValNodePtr  ovp;
17984   SeqDescPtr     sdp;
17985   SeqEntryPtr    sep;
17986   SeqIdPtr       sip;
17987   SubSourcePtr   ssp;
17988   ValNode        vn;
17989   ValNodePtr     last_dbxref;
17990 
17991   /* look for biosource features */
17992   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return;
17993   /* get bioseq by feature location */
17994   sip = SeqLocId (sfp->location);
17995   bsp = BioseqFind (sip);
17996   if (bsp == NULL) return;
17997   sip = SeqIdFindBest(bsp->id, 0);
17998   if (sip == NULL) return;
17999   vn.choice = SEQLOC_WHOLE;
18000   vn.extended = 0;
18001   vn.data.ptrvalue = (Pointer) sip;
18002   vn.next = NULL;
18003   /* is feature full length? */
18004   if (SeqLocCompare (sfp->location, &vn) != SLC_A_EQ_B) return;
18005   sep = SeqMgrGetSeqEntryForData (bsp);
18006   if (sep == NULL) return;
18007   sdp = CreateNewDescriptor (sep, Seq_descr_source);
18008   if (sdp == NULL) return;
18009   /* move biosource from feature to descriptor */
18010   sdp->data.ptrvalue = sfp->data.value.ptrvalue;
18011   if (sdp->extended != 0) {
18012     ovp = (ObjValNodePtr) sdp;
18013     ovp->idx.subtype = Seq_descr_source;
18014   }
18015   sfp->data.value.ptrvalue = NULL;
18016   /* flag old feature for removal */
18017   sfp->idx.deleteme = TRUE;
18018   /* move comment to subsource note */
18019   if (sfp->comment == NULL) return;
18020   biop = (BioSourcePtr) sdp->data.ptrvalue;
18021   if (biop == NULL) return;
18022   ssp = SubSourceNew ();
18023   if (ssp == NULL) return;
18024   ssp->subtype = SUBSRC_other;
18025   ssp->name = sfp->comment;
18026   sfp->comment = NULL;
18027   /* link in at end, since BasicSeqEntry will have sorted this list */
18028   if (biop->subtype == NULL) {
18029     biop->subtype = ssp;
18030   } else {
18031     lastssp = biop->subtype;
18032     while (lastssp->next != NULL) {
18033       lastssp = lastssp->next;
18034     }
18035     lastssp->next = ssp;
18036   }
18037 
18038   /* move dbxrefs on feature to source */
18039   if (sfp->dbxref != NULL) {
18040     if (biop->org == NULL) {
18041       biop->org = OrgRefNew();
18042     }
18043     last_dbxref = biop->org->db;
18044     while (last_dbxref != NULL && last_dbxref->next != NULL) {
18045       last_dbxref = last_dbxref->next;
18046     }
18047     if (last_dbxref == NULL) {
18048       biop->org->db = sfp->dbxref;
18049     } else {
18050       last_dbxref->next = sfp->dbxref;
18051     }
18052     sfp->dbxref = NULL;
18053   }
18054 }
18055 
ExtendSingleGeneOnMRNA(BioseqPtr bsp,Pointer userdata)18056 extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
18057 
18058 {
18059   MolInfoPtr        mip;
18060   SeqDescrPtr       sdp;
18061   Boolean           is_mrna = FALSE, is_master_seq = FALSE, has_nulls = FALSE;
18062   SeqFeatPtr        gene = NULL;
18063   SeqFeatPtr        sfp;
18064   SeqMgrFeatContext context;
18065   Int4              num_cds = 0;
18066   Int4              num_mrna = 0;
18067   SeqIdPtr          sip;
18068   SeqLocPtr         slp;
18069   Boolean           partial5, partial3;
18070   BioSourcePtr      biop;
18071   OrgRefPtr         orp;
18072   BioseqSetPtr      bssp;
18073 
18074   if (bsp == NULL || bsp->length == 0
18075       || !ISA_na (bsp->mol)) {
18076     return;
18077   }
18078 
18079   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
18080   if (sdp != NULL) {
18081     mip = (MolInfoPtr) sdp->data.ptrvalue;
18082     if (mip != NULL && mip->biomol == MOLECULE_TYPE_MRNA) {
18083       is_mrna = TRUE;
18084     }
18085   }
18086   if (!is_mrna) {
18087     return;
18088   }
18089 
18090   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
18091   if (sdp != NULL) {
18092     biop = (BioSourcePtr) sdp->data.ptrvalue;
18093     if (biop != NULL) {
18094       if (biop->origin == ORG_ARTIFICIAL) {
18095         orp = biop->org;
18096         if (orp != NULL) {
18097           if (StringICmp (orp->taxname, "synthetic construct") == 0) return;
18098         }
18099       }
18100     }
18101   }
18102 
18103   if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
18104     bssp = (BioseqSetPtr) bsp->idx.parentptr;
18105     if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset) {
18106       is_master_seq = TRUE;
18107     }
18108   }
18109 
18110   for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
18111        sfp != NULL;
18112        sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
18113     if (sfp->data.choice == SEQFEAT_GENE) {
18114       /* skip this sequence if it has more than one gene */
18115       if (gene == NULL) {
18116         gene = sfp;
18117       } else {
18118         return;
18119       }
18120     } else if (sfp->data.choice == SEQFEAT_CDREGION) {
18121       num_cds++;
18122       /* skip this sequence if it has more than one coding region */
18123       if (num_cds > 1 && !is_master_seq) {
18124         return;
18125       }
18126     } else if (sfp->idx.subtype == FEATDEF_mRNA) {
18127       num_mrna++;
18128       /* skip this sequence if it has more than one mRNA */
18129       if (num_mrna > 1) return;
18130     }
18131   }
18132 
18133   if (gene != NULL && gene->location != NULL) {
18134     slp = gene->location;
18135     if (slp->choice != SEQLOC_INT) {
18136       for (sip = bsp->id; sip != NULL; sip = sip->next) {
18137         /* skip this sequence if it is multi-interval and EMBL or DDBJ */
18138         if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) return;
18139       }
18140     }
18141   }
18142 
18143   if (gene != NULL && BioseqFindFromSeqLoc (gene->location) == bsp) {
18144     CheckSeqLocForPartial (gene->location, &partial5, &partial3);
18145     has_nulls = LocationHasNullsBetween (gene->location);
18146     /* gene should cover entire length of sequence */
18147     slp = SeqLocIntNew (0, bsp->length - 1, SeqLocStrand (gene->location), SeqIdFindBest (bsp->id, 0));
18148     SetSeqLocPartial (slp, partial5, partial3);
18149     gene->location = SeqLocFree (gene->location);
18150     gene->location = slp;
18151     if (is_master_seq) {
18152       MergeFeatureIntervalsToParts (gene, has_nulls);
18153     }
18154   }
18155 }
18156 
18157 //LCOV_EXCL_START
DbtagParse(CharPtr str)18158 static DbtagPtr DbtagParse (
18159   CharPtr str
18160 )
18161 
18162 {
18163   Boolean      all_digits = TRUE;
18164   Char         ch;
18165   DbtagPtr     dbt;
18166   long         num;
18167   Int2         num_digits = 0;
18168   ObjectIdPtr  oip;
18169   CharPtr      ptr;
18170   CharPtr      tmp;
18171 
18172   if (StringHasNoText (str)) return NULL;
18173   ptr = StringChr (str, ':');
18174   if (ptr == NULL) return NULL;
18175 
18176   dbt = DbtagNew ();
18177   oip = ObjectIdNew ();
18178   if (dbt == NULL || oip == NULL) return NULL;
18179 
18180   if (ptr != NULL) {
18181     *ptr = '\0';
18182     ptr++;
18183   }
18184 
18185   dbt->db = StringSave (str);
18186   dbt->tag = oip;
18187 
18188   tmp = ptr;
18189   ch = *tmp;
18190   while (ch != '\0') {
18191     if (IS_DIGIT (ch)) {
18192       num_digits++;
18193     } else {
18194       all_digits = FALSE;
18195     }
18196     tmp++;
18197     ch = *tmp;
18198   }
18199 
18200   if (all_digits && *ptr != '0') {
18201     if (num_digits < 10 || (num_digits == 10 && StringCmp (ptr, "2147483647") <= 0)) {
18202       sscanf (ptr, "%ld", &num);
18203       oip->id = (Int4) num;
18204       return dbt;
18205     }
18206   }
18207 
18208   oip->str = StringSave (ptr);
18209 
18210   return dbt;
18211 }
18212 //LCOV_EXCL_STOP
18213 
GetNomenclatureUOP(UserObjectPtr uop,Pointer userdata)18214 static void GetNomenclatureUOP (
18215   UserObjectPtr uop,
18216   Pointer userdata
18217 )
18218 
18219 {
18220   ObjectIdPtr         oip;
18221   UserObjectPtr PNTR  uopp;
18222 
18223   if (uop == NULL || userdata == NULL) return;
18224   oip = uop->type;
18225   if (oip == NULL) return;
18226   if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
18227   uopp = (UserObjectPtr PNTR) userdata;
18228   *uopp = uop;
18229 }
18230 
18231 
18232 //LCOV_EXCL_START
ModernizeGeneFields(SeqFeatPtr sfp)18233 NLM_EXTERN void ModernizeGeneFields (
18234   SeqFeatPtr sfp
18235 )
18236 
18237 {
18238   GeneNomenclaturePtr  gnp;
18239   GeneRefPtr           grp;
18240   ObjectIdPtr          oip;
18241   CharPtr              str;
18242   CharPtr              symbol = NULL, name = NULL, source = NULL;
18243   Uint2                status = 0;
18244   UserFieldPtr         ufp;
18245   UserObjectPtr        uop = NULL;
18246   UserObjectPtr        curr, next;
18247   UserObjectPtr PNTR   prev;
18248 
18249   if (sfp == NULL) return;
18250   if (sfp->data.choice != SEQFEAT_GENE) return;
18251 
18252   grp = (GeneRefPtr) sfp->data.value.ptrvalue;
18253   if (grp == NULL) return;
18254 
18255   if (grp->formal_name != NULL) return;
18256 
18257   if (sfp->ext == NULL) return;
18258   VisitUserObjectsInUop (sfp->ext, (Pointer) &uop, GetNomenclatureUOP);
18259   if (uop == NULL) return;
18260 
18261   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
18262     oip = ufp->label;
18263     if (oip == NULL || oip->str == NULL) continue;
18264     if (StringICmp (oip->str, "Symbol") == 0) {
18265       if (ufp->choice == 1) {
18266         str = (CharPtr) ufp->data.ptrvalue;
18267         if (str != NULL) {
18268           symbol = str;
18269         }
18270       }
18271     } else if (StringICmp (oip->str, "Name") == 0) {
18272       if (ufp->choice == 1) {
18273         str = (CharPtr) ufp->data.ptrvalue;
18274         if (str != NULL) {
18275           name = str;
18276         }
18277       }
18278     } else if (StringICmp (oip->str, "DataSource") == 0) {
18279       if (ufp->choice == 1) {
18280         str = (CharPtr) ufp->data.ptrvalue;
18281         if (str != NULL) {
18282           source = str;
18283         }
18284       }
18285     } else if (StringICmp (oip->str, "Status") == 0) {
18286       if (ufp->choice == 1) {
18287         str = (CharPtr) ufp->data.ptrvalue;
18288         if (str != NULL) {
18289           if (StringICmp (str, "Official") == 0) {
18290             status = 1;
18291           } else if (StringICmp (str, "Interim") == 0) {
18292             status = 2;
18293           }
18294         }
18295       }
18296     }
18297   }
18298   if (symbol == NULL && name == NULL && source == NULL && status == 0) return;
18299 
18300   gnp = GeneNomenclatureNew ();
18301   if (gnp == NULL) return;
18302 
18303   gnp->status = status;
18304   gnp->symbol = StringSaveNoNull (symbol);
18305   gnp->name = StringSaveNoNull (name);
18306   gnp->source = DbtagParse (source);
18307 
18308   grp->formal_name = gnp;
18309 
18310   prev = (UserObjectPtr PNTR) &(sfp->ext);
18311   curr = sfp->ext;
18312   while (curr != NULL) {
18313     next = curr->next;
18314     if (uop == curr) {
18315       *(prev) = curr->next;
18316       curr->next = NULL;
18317       UserObjectFree (curr);
18318     } else {
18319       prev = (UserObjectPtr PNTR) &(curr->next);
18320     }
18321     curr = next;
18322   }
18323 }
18324 //LCOV_EXCL_STOP
18325 
18326 
18327 /* PCR_primer manipulation functions */
18328 
ParsePCRComponent(CharPtr strs)18329 static ValNodePtr ParsePCRComponent (
18330   CharPtr strs
18331 )
18332 
18333 {
18334   ValNodePtr  head = NULL;
18335   size_t      len;
18336   CharPtr     ptr, str, tmp;
18337 
18338   if (StringHasNoText (strs)) return NULL;
18339 
18340   tmp = StringSave (strs);
18341   if (tmp == NULL) return NULL;
18342 
18343   str = tmp;
18344   len = StringLen (str);
18345   if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
18346     str [len - 1] = '\0';
18347     str++;
18348   }
18349 
18350   while (StringDoesHaveText (str)) {
18351     ptr = StringChr (str, ',');
18352     if (ptr != NULL) {
18353       *ptr = '\0';
18354       ptr++;
18355     }
18356 
18357     TrimSpacesAroundString (str);
18358     ValNodeCopyStr (&head, 0, str);
18359 
18360     str = ptr;
18361   }
18362 
18363   MemFree (tmp);
18364   return head;
18365 }
18366 
ParsePCRStrings(CharPtr fwd_primer_seq,CharPtr rev_primer_seq,CharPtr fwd_primer_name,CharPtr rev_primer_name)18367 NLM_EXTERN ValNodePtr ParsePCRStrings (
18368   CharPtr fwd_primer_seq,
18369   CharPtr rev_primer_seq,
18370   CharPtr fwd_primer_name,
18371   CharPtr rev_primer_name
18372 )
18373 
18374 {
18375   ValNodePtr  curr_fwd_name;
18376   ValNodePtr  curr_fwd_seq;
18377   ValNodePtr  curr_rev_name;
18378   ValNodePtr  curr_rev_seq;
18379   CharPtr     fwd_name;
18380   CharPtr     fwd_seq;
18381   CharPtr     rev_name;
18382   CharPtr     rev_seq;
18383   ValNodePtr  fwd_name_list = NULL;
18384   ValNodePtr  fwd_seq_list = NULL;
18385   ValNodePtr  rev_name_list = NULL;
18386   ValNodePtr  rev_seq_list = NULL;
18387   ValNodePtr  head = NULL;
18388   Boolean     okay;
18389   Int2        orig_order = 0;
18390   PcrSetPtr   psp;
18391 
18392   fwd_seq_list = ParsePCRComponent (fwd_primer_seq);
18393   rev_seq_list = ParsePCRComponent (rev_primer_seq);
18394   fwd_name_list = ParsePCRComponent (fwd_primer_name);
18395   rev_name_list = ParsePCRComponent (rev_primer_name);
18396 
18397   curr_fwd_seq = fwd_seq_list;
18398   curr_rev_seq = rev_seq_list;
18399   curr_fwd_name = fwd_name_list;
18400   curr_rev_name = rev_name_list;
18401 
18402   while (curr_fwd_seq != NULL || curr_rev_seq != NULL || curr_fwd_name != NULL || curr_rev_name != NULL) {
18403     fwd_seq = NULL;
18404     rev_seq = NULL;
18405     fwd_name = NULL;
18406     rev_name = NULL;
18407     okay = FALSE;
18408 
18409     if (curr_fwd_seq != NULL) {
18410       fwd_seq = (CharPtr) curr_fwd_seq->data.ptrvalue;
18411       curr_fwd_seq = curr_fwd_seq->next;
18412       okay = TRUE;
18413     }
18414 
18415     if (curr_rev_seq != NULL) {
18416       rev_seq = (CharPtr) curr_rev_seq->data.ptrvalue;
18417       curr_rev_seq = curr_rev_seq->next;
18418       okay = TRUE;
18419     }
18420 
18421     if (curr_fwd_name != NULL) {
18422       fwd_name = (CharPtr) curr_fwd_name->data.ptrvalue;
18423       curr_fwd_name = curr_fwd_name->next;
18424       okay = TRUE;
18425     }
18426 
18427     if (curr_rev_name != NULL) {
18428       rev_name = (CharPtr) curr_rev_name->data.ptrvalue;
18429       curr_rev_name = curr_rev_name->next;
18430       okay = TRUE;
18431     }
18432 
18433     if (okay) {
18434       psp = (PcrSetPtr) MemNew (sizeof (PcrSet));
18435       if (psp != NULL) {
18436         psp->fwd_seq = StringSaveNoNull (fwd_seq);
18437         psp->rev_seq = StringSaveNoNull (rev_seq);
18438         psp->fwd_name = StringSaveNoNull (fwd_name);
18439         psp->rev_name = StringSaveNoNull (rev_name);
18440         orig_order++;
18441         psp->orig_order = orig_order;
18442         ValNodeAddPointer (&head, 0, (Pointer) psp);
18443       }
18444     }
18445   }
18446 
18447   ValNodeFreeData (fwd_seq_list);
18448   ValNodeFreeData (rev_seq_list);
18449   ValNodeFreeData (fwd_name_list);
18450   ValNodeFreeData (rev_name_list);
18451 
18452   return head;
18453 }
18454 
ParsePCRSet(BioSourcePtr biop)18455 NLM_EXTERN ValNodePtr ParsePCRSet (
18456   BioSourcePtr biop
18457 )
18458 
18459 {
18460   CharPtr       fwd_primer_seq = NULL;
18461   CharPtr       rev_primer_seq = NULL;
18462   CharPtr       fwd_primer_name = NULL;
18463   CharPtr       rev_primer_name = NULL;
18464   SubSourcePtr  ssp;
18465 
18466   if (biop == NULL) return NULL;
18467 
18468   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
18469     if (ssp->subtype == SUBSRC_fwd_primer_seq) {
18470       fwd_primer_seq = ssp->name;
18471     } else if (ssp->subtype == SUBSRC_rev_primer_seq) {
18472       rev_primer_seq = ssp->name;
18473     } else if (ssp->subtype == SUBSRC_fwd_primer_name) {
18474       fwd_primer_name = ssp->name;
18475     } else if (ssp->subtype == SUBSRC_rev_primer_name) {
18476       rev_primer_name = ssp->name;
18477     }
18478   }
18479 
18480   return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
18481 }
18482 
ParsePCRColonString(CharPtr strs)18483 static ValNodePtr ParsePCRColonString (
18484   CharPtr strs
18485 )
18486 
18487 {
18488   ValNodePtr  head = NULL;
18489   size_t      len;
18490   CharPtr     ptr, str, tmp;
18491 
18492   if (StringHasNoText (strs)) return NULL;
18493 
18494   tmp = StringSave (strs);
18495   str = tmp;
18496   len = StringLen (str);
18497   if (len > 1 && StringChr (str, ':') != NULL) {
18498     while (StringDoesHaveText (str)) {
18499       ptr = StringChr (str, ':');
18500       if (ptr != NULL) {
18501         *ptr = '\0';
18502         ptr++;
18503       }
18504       TrimSpacesAroundString (str);
18505       ValNodeCopyStr (&head, 0, str);
18506       str = ptr;
18507     }
18508   } else {
18509     ValNodeCopyStr (&head, 0, str);
18510   }
18511 
18512   MemFree (tmp);
18513   return head;
18514 }
18515 
18516 //LCOV_EXCL_START
FusePrimerNames(CharPtr first,CharPtr second)18517 static CharPtr FusePrimerNames(
18518   CharPtr first,
18519   CharPtr second
18520 )
18521 
18522 {
18523   size_t   len;
18524   CharPtr  str;
18525 
18526   if (first == NULL) return second;
18527   if (second == NULL) return first;
18528 
18529   len = StringLen (first) + StringLen (second) + 5;
18530   str = MemNew (len);
18531   if (str == NULL) return NULL;
18532 
18533   StringCpy (str, first);
18534   StringCat (str, ":");
18535   StringCat (str, second);
18536 
18537   return str;
18538 }
18539 
ModernizePCRPrimerHalf(CharPtr seq,CharPtr name)18540 static PCRPrimerPtr ModernizePCRPrimerHalf (
18541   CharPtr seq,
18542   CharPtr name
18543 )
18544 
18545 {
18546   CharPtr       curr_name = NULL, curr_seq = NULL, fused_name;
18547   PCRPrimerPtr  curr_primer = NULL, last_primer = NULL, primer_set = NULL;
18548   ValNodePtr    name_list, seq_list, name_vnp, seq_vnp;
18549 
18550   seq_list = ParsePCRColonString (seq);
18551   name_list = ParsePCRColonString (name);
18552 
18553   seq_vnp = seq_list;
18554   name_vnp = name_list;
18555 
18556   while (seq_vnp != NULL /* || name_vnp != NULL */) {
18557     if (seq_vnp != NULL) {
18558       curr_seq = (CharPtr) seq_vnp->data.ptrvalue;
18559       seq_vnp = seq_vnp->next;
18560     }
18561     if (name_vnp != NULL) {
18562       curr_name = (CharPtr) name_vnp->data.ptrvalue;
18563       name_vnp = name_vnp->next;
18564     } else {
18565       curr_name = NULL;
18566     }
18567 
18568     curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
18569     if (curr_primer != NULL) {
18570       curr_primer->seq = StringSaveNoNull (curr_seq);
18571       curr_primer->name = StringSaveNoNull (curr_name);
18572 
18573       if (primer_set == NULL) {
18574         primer_set = curr_primer;
18575       }
18576       if (last_primer != NULL) {
18577         last_primer->next = curr_primer;
18578       }
18579       last_primer = curr_primer;
18580     }
18581   }
18582 
18583   while (name_vnp != NULL && last_primer != NULL) {
18584     curr_name = (CharPtr) name_vnp->data.ptrvalue;
18585     fused_name = FusePrimerNames (last_primer->name, curr_name);
18586     MemFree (last_primer->name);
18587     last_primer->name = StringSaveNoNull (fused_name);
18588     name_vnp = name_vnp->next;
18589   }
18590 
18591   while (name_vnp != NULL && last_primer == NULL) {
18592     curr_name = (CharPtr) name_vnp->data.ptrvalue;
18593     curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
18594     if (curr_primer != NULL) {
18595       curr_primer->name = StringSaveNoNull (curr_name);
18596 
18597       if (primer_set == NULL) {
18598         primer_set = curr_primer;
18599       }
18600       if (last_primer != NULL) {
18601         last_primer->next = curr_primer;
18602       }
18603       last_primer = curr_primer;
18604     }
18605     name_vnp = name_vnp->next;
18606   }
18607 
18608   ValNodeFreeData (seq_list);
18609   ValNodeFreeData (name_list);
18610 
18611   return primer_set;
18612 }
18613 
ModernizePCRPrimers(BioSourcePtr biop)18614 NLM_EXTERN void ModernizePCRPrimers (
18615   BioSourcePtr biop
18616 )
18617 
18618 {
18619   PCRReactionSetPtr  curr_reaction, last_reaction = NULL, reaction_set = NULL;
18620   PCRPrimerPtr       forward, reverse;
18621   PcrSetPtr          psp;
18622   ValNodePtr         pset, vnp;
18623   SubSourcePtr       nextssp;
18624   SubSourcePtr PNTR  prevssp;
18625   SubSourcePtr       ssp;
18626   Boolean            unlink;
18627 
18628   if (biop == NULL) return;
18629   /* if (biop->pcr_primers != NULL) return; */
18630 
18631   pset = ParsePCRSet (biop);
18632   if (pset == NULL) return;
18633 
18634   for (vnp = pset; vnp != NULL; vnp = vnp->next) {
18635     psp = (PcrSetPtr) vnp->data.ptrvalue;
18636     if (psp == NULL) continue;
18637 
18638     forward = ModernizePCRPrimerHalf (psp->fwd_seq, psp->fwd_name);
18639     reverse = ModernizePCRPrimerHalf (psp->rev_seq, psp->rev_name);
18640 
18641     if (forward != NULL || reverse != NULL) {
18642 
18643       curr_reaction = (PCRReactionSetPtr) MemNew (sizeof (PCRReactionSet));
18644       if (curr_reaction != NULL) {
18645         curr_reaction->forward = forward;
18646         curr_reaction->reverse = reverse;
18647 
18648         if (reaction_set == NULL) {
18649           reaction_set = curr_reaction;
18650         }
18651         if (last_reaction != NULL) {
18652           last_reaction->next = curr_reaction;
18653         }
18654         last_reaction = curr_reaction;
18655       }
18656     }
18657   }
18658 
18659   FreePCRSet (pset);
18660 
18661   if (reaction_set != NULL) {
18662     if (last_reaction != NULL) {
18663       /* merge with existing structured pcr_primers */
18664       last_reaction->next = biop->pcr_primers;
18665     }
18666     biop->pcr_primers = reaction_set;
18667 
18668     ssp = biop->subtype;
18669     prevssp = (SubSourcePtr PNTR) &(biop->subtype);
18670     while (ssp != NULL) {
18671       nextssp = ssp->next;
18672       unlink= FALSE;
18673 
18674       if (ssp->subtype == SUBSRC_fwd_primer_seq ||
18675           ssp->subtype == SUBSRC_rev_primer_seq ||
18676           ssp->subtype == SUBSRC_fwd_primer_name ||
18677           ssp->subtype == SUBSRC_rev_primer_name) {
18678         unlink = TRUE;
18679       }
18680 
18681       if (unlink) {
18682         *prevssp = ssp->next;
18683         ssp->next = NULL;
18684         SubSourceFree (ssp);
18685       } else {
18686         prevssp = (SubSourcePtr PNTR) &(ssp->next);
18687       }
18688       ssp = nextssp;
18689     }
18690   }
18691 }
18692 //LCOV_EXCL_STOP
18693 
PubIsEffectivelyEmpty(PubdescPtr pdp)18694 NLM_EXTERN Boolean PubIsEffectivelyEmpty (PubdescPtr pdp)
18695 
18696 {
18697   ValNodePtr  vnp;
18698 
18699   if (pdp == NULL) return FALSE;
18700   vnp = pdp->pub;
18701   if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
18702     if (empty_citgen ((CitGenPtr) vnp->data.ptrvalue)) {
18703       return TRUE;
18704     }
18705   }
18706   return FALSE;
18707 }
18708 
18709 
18710 
18711 
18712