1 /*   sqnutils.h
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  sqnutils.h
27 *
28 * Author:  Jonathan Kans
29 *
30 * Version Creation Date:   9/2/97
31 *
32 * $Revision: 6.747 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date     Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 
45 #ifndef _SQNUTILS_
46 #define _SQNUTILS_
47 
48 #include <ncbi.h>
49 #include <sequtil.h>
50 #include <objpubme.h>
51 #include <objentgene.h>
52 #include <util/creaders/alnread.h>
53 #include <subutil.h>
54 
55 #undef NLM_EXTERN
56 #ifdef NLM_IMPORT
57 #define NLM_EXTERN NLM_IMPORT
58 #else
59 #define NLM_EXTERN extern
60 #endif
61 
62 #ifdef __cplusplus
63 extern "C" {
64 #endif
65 
66 typedef  void  (*Nlm_ChangeNotifyProc) PROTO ((Pointer));
67 
68 NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID);
69 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForData (Uint2 entityID, BioseqPtr bsp);
70 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemID (Uint2 entityID, Uint4 itemID, Uint2 itemtype);
71 
72 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForDataEx (Uint2 entityID, BioseqPtr bsp, Boolean skipGenProdSet);
73 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemIDEx (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Boolean skipGenProdSet);
74 
75 NLM_EXTERN SeqIdPtr SeqIdFindWorst (SeqIdPtr sip);
76 NLM_EXTERN void ChangeSeqIdToWorstID (SeqIdPtr sip);
77 NLM_EXTERN void ChangeSeqLocToWorstID (SeqLocPtr slp);
78 
79 NLM_EXTERN SeqIdPtr MakeSeqID (CharPtr str);
80 NLM_EXTERN SeqIdPtr MakeUniqueSeqID (CharPtr prefix);
81 
82 NLM_EXTERN DatePtr DateAdvance (DatePtr dp, Uint1 monthsToAdd);
83 
84 NLM_EXTERN SeqEntryPtr LIBCALL FindNthSeqEntry (SeqEntryPtr sep, Int2 seq);
85 NLM_EXTERN SeqEntryPtr LIBCALL FindNthBioseq (SeqEntryPtr sep, Int2 seq);
86 NLM_EXTERN SeqEntryPtr LIBCALL FindNthSequinEntry (SeqEntryPtr sep, Int2 seq);
87 NLM_EXTERN SeqEntryPtr LIBCALL FindNucSeqEntry (SeqEntryPtr sep);
88 NLM_EXTERN BioseqPtr LIBCALL FindNucBioseq (SeqEntryPtr sep);
89 NLM_EXTERN SeqEntryPtr LIBCALL FindBioseqSetByClass (SeqEntryPtr sep, Uint1 _class);
90 
91 NLM_EXTERN Boolean LIBCALL SeqEntryHasNucs (SeqEntryPtr sep);
92 NLM_EXTERN Boolean LIBCALL SeqEntryHasProts (SeqEntryPtr sep);
93 NLM_EXTERN Boolean LIBCALL SeqEntryHasAligns (Uint2 entityID, SeqEntryPtr sep);
94 NLM_EXTERN Boolean LIBCALL PowerBLASTASN1Detected (SeqEntryPtr sep);
95 
96 NLM_EXTERN Int2 EntityIDToGeneticCode (Uint2 entityID, BoolPtr mito, CharPtr taxname, size_t maxsize);
97 NLM_EXTERN Int2 SeqEntryToGeneticCode (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize);
98 NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize, BioSourcePtr PNTR biopp);
99 
100 NLM_EXTERN Boolean BioseqToGeneticCode (
101   BioseqPtr bsp,
102   Int2Ptr gencodep,
103   BoolPtr mitop,
104   BoolPtr plastidp,
105   CharPtr taxnamep,
106   size_t maxsize,
107   BioSourcePtr PNTR biopp
108 );
109 
110 NLM_EXTERN SeqLocPtr   CreateWholeInterval (SeqEntryPtr sep);
111 NLM_EXTERN SeqFeatPtr  CreateNewFeature (SeqEntryPtr sep, SeqEntryPtr placeHere, Uint1 choice, SeqFeatPtr useThis);
112 NLM_EXTERN ValNodePtr  CreateNewDescriptor (SeqEntryPtr sep, Uint1 choice);
113 
114 NLM_EXTERN SeqLocPtr WholeIntervalFromSeqId (SeqIdPtr sip);
115 
116 NLM_EXTERN Boolean IsPopPhyEtcSet (Uint1 _class);
117 
118 /* Variants that call SeqMgrGetSeqEntryForData. The feature version allows a location
119 to be specified, overriding the default full-length seq-int location.  (If location is
120 not NULL, it copies it after deleting the existing sfp->location.)  For both functions
121 you still need to set the sfp->data.value.ptrvalue of the sdp->data.ptrvalue. */
122 NLM_EXTERN SeqFeatPtr CreateNewFeatureOnBioseq (BioseqPtr bsp, Uint1 choice, SeqLocPtr slp);
123 NLM_EXTERN ValNodePtr CreateNewDescriptorOnBioseq (BioseqPtr bsp, Uint1 choice);
124 
125 NLM_EXTERN void        UpdateLocalId (BioseqPtr bsp, CharPtr localId);
126 NLM_EXTERN void        UpdateTitle (BioseqPtr bsp, CharPtr title);
127 
128 NLM_EXTERN GeneRefPtr  CreateNewGeneRef (CharPtr locus, CharPtr allele,
129                                      CharPtr desc, Boolean pseudo);
130 NLM_EXTERN ProtRefPtr  CreateNewProtRef (CharPtr name, CharPtr desc,
131                                      CharPtr ec, CharPtr activity);
132 NLM_EXTERN CdRegionPtr CreateNewCdRgn (Uint1 frame, Boolean orf, Int2 genCode);
133 
134 NLM_EXTERN void        SetSeqFeatData (SeqFeatPtr sfp, Pointer data);
135 NLM_EXTERN void        SetSeqFeatProduct (SeqFeatPtr sfp, BioseqPtr bsp);
136 NLM_EXTERN void        ResetSeqFeatInterval (SeqFeatPtr sfp);
137 
138 NLM_EXTERN void        AddSeqFeatInterval (SeqFeatPtr sfp, BioseqPtr bsp, Int4 from, Int4 to,
139                                        Boolean partial5, Boolean partial3);
140 
141 NLM_EXTERN void        AddSeqLocPoint (SeqLocPtr PNTR old_slp, SeqIdPtr sip, Int4 location,
142                                        Boolean fuzz_before, Boolean fuzz_after, Int2 strand);
143 NLM_EXTERN void        AddSeqFeatPoint (SeqFeatPtr sfp, BioseqPtr bsp, Int4 location, Boolean fuzz_before, Boolean fuzz_after, Int2 strand);
144 
145 /* AddSeqEntryToSeqEntry and ReplaceSeqEntryWithSeqEntry leave
146    the original target sep pointing to the new structure. */
147 
148 NLM_EXTERN void        AddSeqEntryToSeqEntry (SeqEntryPtr target, SeqEntryPtr insert, Boolean relink);
149 NLM_EXTERN void        ReplaceSeqEntryWithSeqEntry (SeqEntryPtr target, SeqEntryPtr replaceWith, Boolean relink);
150 
151 NLM_EXTERN void        RemoveSeqEntryFromSeqEntry (SeqEntryPtr top, SeqEntryPtr del, Boolean relink);
152 NLM_EXTERN Int4        RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink);
153 NLM_EXTERN Int4        RemoveSingleItemSet (SeqEntryPtr sep, Boolean relink);
154 
155 /* The following functions are called by the above when relink is TRUE.  Examine the
156    code of ReplaceSeqEntryWithSeqEntry (in dlgutil2.c) to see how relink is treated. */
157 
158 NLM_EXTERN void        GetSeqEntryParent (SeqEntryPtr target, Pointer PNTR parentptr, Uint2Ptr parenttype);
159 
160 NLM_EXTERN void        SaveSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr PNTR omdptopptr, ObjMgrData PNTR omdataptr);
161 NLM_EXTERN void        RestoreSeqEntryObjMgrData (SeqEntryPtr target, ObjMgrDataPtr omdptop, ObjMgrData PNTR omdataptr);
162 
163 /* If relink FALSE, call SeqMgrLinkSeqEntry (target, parenttype, parentptr)
164    with original parent after all sequences have been added to the target. */
165 
166 /* If relink FALSE, call SaveSeqEntryObjMgrData with the address of temporary
167    ObjMgrDataPtr and ObjMgrData variables, and after calling SeqMgrLinkSeqEntry to
168    update the link table, call RestoreSeqEntryObjMgrData with the value of the
169    temporary ObjMgrDataPtr and the address of the ObjMgrData variable. */
170 
171 /* ExtractBioSourceAndPubs and ReplaceBioSourceAndPubs can be called before and
172    after AddSeqEntryToSeqEntry to propagate source and pub descriptors to top level. */
173 
174 NLM_EXTERN ValNodePtr  ExtractBioSourceAndPubs (SeqEntryPtr sep);
175 NLM_EXTERN void        ReplaceBioSourceAndPubs (SeqEntryPtr sep, ValNodePtr descr);
176 
177 /* SeqLocMerge combines feature intervals.  It can be used to extend the gene feature
178    intervals, and (eventually) to fuse mutliple features into one. */
179 
180 NLM_EXTERN SeqLocPtr SeqLocMerge (BioseqPtr target,
181                                   SeqLocPtr to, SeqLocPtr from,
182                                   Boolean single_interval, Boolean fuse_joints,
183                                   Boolean add_null);
184 
185 NLM_EXTERN SeqLocPtr SeqLocMergeEx (BioseqPtr target, SeqLocPtr to, SeqLocPtr from,
186                                     Boolean single_interval, Boolean fuse_joints,
187                                     Boolean merge_overlaps, Boolean add_null);
188 
189 NLM_EXTERN SeqLocPtr SeqLocMergeExEx (
190   BioseqPtr target,
191   SeqLocPtr to,
192   SeqLocPtr from,
193   Boolean single_interval,
194   Boolean fuse_joints,
195   Boolean merge_overlaps,
196   Boolean add_null,
197   Boolean ignore_mixed,
198   Boolean ignore_out_of_order,
199   Boolean relaxed
200 );
201 
202 NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr);
203 NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3);
204 NLM_EXTERN void FreeAllFuzz (SeqLocPtr location);
205 NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location);
206 NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location);
207 NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location);
208 NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp);
209 NLM_EXTERN Boolean SeqLocBadSortOrder (BioseqPtr bsp, SeqLocPtr slp);
210 NLM_EXTERN Boolean SeqLocMixedStrands (BioseqPtr bsp, SeqLocPtr slp);
211 /* Check/SetSeqLocPartialEx take lim argument - 3 is tr, 4 is tl */
212 NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr);
213 NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim);
214 
215 /* GetBioseqGivenSeqLoc returns a segmented bioseq if the SeqLoc is to the parts */
216 
217 NLM_EXTERN BioseqPtr GetBioseqGivenSeqLoc (SeqLocPtr slp, Uint2 entityID);
218 
219 NLM_EXTERN BioseqPtr GetBioseqGivenIDs (Uint2 entityID, Uint4 itemID, Uint2 itemtype);
220 NLM_EXTERN Uint4 GetItemIDGivenPointer (Uint2 entityID, Uint2 itemtype, Pointer lookfor);
221 
222 NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype);
223 NLM_EXTERN Uint1 FindFeatDefTypeFromKey (CharPtr key);
224 NLM_EXTERN CharPtr FindKeyFromFeatDefType (Uint1 type, Boolean forGBFF);
225 
226 NLM_EXTERN Uint1 CodonToGcIndex (CharPtr codon);
227 NLM_EXTERN CharPtr GcIndextoCodon (Uint1 index);
228 
229 NLM_EXTERN GBQualPtr SortFeatureGBQuals (GBQualPtr list);
230 NLM_EXTERN void CleanupDuplicateGBQuals (GBQualPtr PNTR prevgbq);
231 
232 /* finds bioseq from (cds) product, gets largest protein feature packaged on it */
233 
234 NLM_EXTERN SeqFeatPtr LIBCALL GetBestProteinFeatureUnindexed (SeqLocPtr product);
235 
236 /* set coding region partial flags by initial dash and final star in translation */
237 
238 NLM_EXTERN void CodingRegionPartialsFromTranslation (SeqEntryPtr sep);
239 
240 /* impose coding region partial flags onto appropriate mRNA and gene features */
241 
242 NLM_EXTERN void ImposeCodingRegionPartials (SeqEntryPtr sep);
243 
244 /* resynchronizes coding regions with product protein bioseq molinfo and protein feature */
245 
246 NLM_EXTERN void ResynchCodingRegionPartials (SeqEntryPtr sep);
247 NLM_EXTERN Boolean ResynchCodingRegionPartialsEx (SeqEntryPtr sep, FILE *log_fp);
248 
249 /* resynchronizes mRNAs with product cDNA bioseq */
250 
251 NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep);
252 
253 /* resynchronizes protein feature with product peptide bioseq */
254 
255 NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep);
256 
257 /* individual feature callbacks for above functions */
258 
259 NLM_EXTERN void CDSPartialsFromTranslation (SeqFeatPtr sfp, Pointer userdata);
260 NLM_EXTERN void ImposeCDSPartials (SeqFeatPtr sfp, Pointer userdata);
261 NLM_EXTERN void ImposeGenePartials (SeqFeatPtr sfp, Pointer userdata);
262 NLM_EXTERN void ResynchMRNAPartials (SeqFeatPtr sfp, Pointer userdata);
263 NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata);
264 NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata);
265 
266 /* functions for associating CDS and parent mRNA using featureIDs */
267 
268 NLM_EXTERN void ClearFeatIDs (SeqFeatPtr sfp);
269 NLM_EXTERN void ClearFeatIDXrefs (SeqFeatPtr sfp);
270 
271 NLM_EXTERN void ClearFeatureIDs (SeqEntryPtr sep);
272 NLM_EXTERN Int4 FindHighestFeatureID (SeqEntryPtr sep);
273 
274 NLM_EXTERN void AssignFeatureIDs (SeqEntryPtr sep);
275 NLM_EXTERN void AssignFeatureIDsWithOffset (SeqEntryPtr sep, Int4Ptr last_used_id, Int4Ptr last_used_ref);
276 
277 NLM_EXTERN void OffsetFeatureIDs (SeqEntryPtr sep, Int4 offset);
278 NLM_EXTERN void OffsetFeatureIDXrefs (SeqEntryPtr sep, Int4 offset);
279 
280 NLM_EXTERN void ReassignFeatureIDs (SeqEntryPtr sep);
281 
282 NLM_EXTERN void LinkCDSmRNAbyOverlap (SeqEntryPtr sep);
283 NLM_EXTERN void LinkCDSmRNAbyProduct (SeqEntryPtr sep);
284 NLM_EXTERN void LinkCDSmRNAbyLabel (SeqEntryPtr sep);
285 NLM_EXTERN void LinkCDSmRNAbyLabelAndLocation (SeqEntryPtr sep);
286 
287 NLM_EXTERN void StripFeatIDXrefAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
288 NLM_EXTERN void StripSeqDataGapAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
289 NLM_EXTERN void StripNewFeatMolInfoFieldsAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
290 NLM_EXTERN void StripPCRPrimerAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
291 NLM_EXTERN void StripOrgNamePgcodeAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
292 NLM_EXTERN void StripGeneRnaPcrAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
293 NLM_EXTERN void StripSeqFeatSupportAsnFilter (AsnIoPtr aip, AsnIoPtr aop);
294 
295 /* functions to parse [org=Drosophila melanogaster] and [gene=lacZ] from titles */
296 /* for example, passing "gene" to SqnTagFind returns "lacZ" */
297 
298 #define MAX_SQN_TAGS  200
299 
300 typedef struct sqntag {
301   CharPtr  query;
302   Int2     num_tags;
303   CharPtr  tag [MAX_SQN_TAGS];
304   CharPtr  val [MAX_SQN_TAGS];
305   Boolean  used [MAX_SQN_TAGS];
306 } SqnTag, PNTR SqnTagPtr;
307 
308 NLM_EXTERN SqnTagPtr SqnTagParse (CharPtr ttl);
309 NLM_EXTERN SqnTagPtr SqnTagFree (SqnTagPtr stp);
310 
311 NLM_EXTERN CharPtr SqnTagFind (SqnTagPtr stp, CharPtr tag);
312 NLM_EXTERN ValNodePtr SqnTagFindMultiple (SqnTagPtr stp, CharPtr tag);
313 NLM_EXTERN CharPtr SqnTagFindUnused (SqnTagPtr stp, CharPtr tag);
314 
315 NLM_EXTERN void ReadTechFromString (CharPtr str, MolInfoPtr mip);
316 NLM_EXTERN void ReadCompletenessFromString (CharPtr str, MolInfoPtr mip);
317 
318 extern Boolean StringsAreEquivalent (CharPtr str1, CharPtr str2);
319 NLM_EXTERN Uint1 EquivalentSubSource (CharPtr str);
320 NLM_EXTERN Uint1 EquivalentOrgMod (CharPtr str);
321 NLM_EXTERN Uint1 EquivalentSubSourceEx (CharPtr str, Boolean allow_discouraged_and_discontinued);
322 NLM_EXTERN Uint1 EquivalentOrgModEx (CharPtr str, Boolean allow_discouraged_and_discontinued);
323 
324 /* functions to extract BioSource, MolInfo, and Bioseq information from parsed titles */
325 
326 NLM_EXTERN BioSourcePtr ParseTitleIntoBioSource (
327   SqnTagPtr stp,
328   CharPtr organism,
329   BioSourcePtr biop
330 );
331 
332 NLM_EXTERN MolInfoPtr ParseTitleIntoMolInfo (
333   SqnTagPtr stp,
334   MolInfoPtr mip
335 );
336 
337 NLM_EXTERN BioseqPtr ParseTitleIntoBioseq (
338   SqnTagPtr stp,
339   BioseqPtr bsp
340 );
341 
342 NLM_EXTERN GeneRefPtr ParseTitleIntoGeneRef (
343   SqnTagPtr stp,
344   GeneRefPtr grp
345 );
346 
347 NLM_EXTERN ProtRefPtr ParseTitleIntoProtRef (
348   SqnTagPtr stp,
349   ProtRefPtr prp
350 );
351 
352 NLM_EXTERN GBBlockPtr ParseTitleIntoGenBank (
353   SqnTagPtr stp,
354   GBBlockPtr gbp
355 );
356 
357 NLM_EXTERN SeqHistPtr ParseTitleIntoSeqHist (
358   SqnTagPtr stp,
359   SeqHistPtr shp
360 );
361 
362 NLM_EXTERN SeqHistPtr ParseStringIntoSeqHist (
363   SeqHistPtr shp,
364   CharPtr str
365 );
366 
367 NLM_EXTERN void ParseTitleIntoSubmitBlock (
368   SqnTagPtr stp,
369   SubmitBlockPtr sbp
370 );
371 
372 NLM_EXTERN UserObjectPtr ParseTitleIntoTpaAssembly (
373   SqnTagPtr stp,
374   UserObjectPtr uop
375 );
376 
377 NLM_EXTERN UserObjectPtr ParseTitleIntoGenomeProjectsDB (
378   SqnTagPtr stp,
379   UserObjectPtr uop
380 );
381 
382 NLM_EXTERN void AddFieldStringToDbLinkUserObject (
383   CharPtr str,
384   CharPtr field_name,
385   UserObjectPtr uop
386 );
387 
388 NLM_EXTERN UserObjectPtr ParseTitleIntoDBLinkBioProject (
389   SqnTagPtr stp,
390   UserObjectPtr uop
391 );
392 
393 NLM_EXTERN UserObjectPtr ParseTitleIntoDBLinkBioSample (
394   SqnTagPtr stp,
395   UserObjectPtr uop
396 );
397 
398 NLM_EXTERN UserObjectPtr ParseTitleIntoDBLinkSeqReadArch (
399   SqnTagPtr stp,
400   UserObjectPtr uop
401 );
402 
403 NLM_EXTERN Boolean IsGenomeProjectIDDescriptor (SeqDescrPtr sdp);
404 NLM_EXTERN SeqDescrPtr GetGenomeProjectIDDescriptor (BioseqPtr bsp);
405 NLM_EXTERN Int4 GetGenomeProjectID (BioseqPtr bsp);
406 
407 NLM_EXTERN CharPtr GetTSAIDDB (BioseqPtr bsp);
408 
409 
410 NLM_EXTERN void AddPubsFromTitle (
411   SqnTagPtr stp,
412   SeqDescrPtr PNTR desc_list
413 );
414 
415 /* structured comment user object for flatfile presentation */
416 
417 NLM_EXTERN UserObjectPtr ParseStringIntoStructuredComment (
418   UserObjectPtr uop,
419   CharPtr str,
420   CharPtr prefix,
421   CharPtr suffix
422 );
423 
424 
425 /* UseLocalAsnloadDataAndErrMsg transiently sets paths to asnload, data, and errmsg
426   if they are packaged in the same directory as the executing program. */
427 
428 NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void);
429 
430 /* GetRidOfLocusInSeqIds strips locus from all feature location and product seqIds */
431 
432 NLM_EXTERN void GetRidOfLocusInSeqIds (Uint2 entityID, SeqEntryPtr sep);
433 
434 NLM_EXTERN SeqLocPtr StripLocusFromSeqLoc (SeqLocPtr location);
435 NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip);
436 
437 /* LeaveBestCDD removes all but best CDD region in an area of overlapping features */
438 
439 NLM_EXTERN void LeaveBestCDD (SeqEntryPtr sep);
440 
441 /* ConvertPubSrcComDescsToFeats is useful when merging records */
442 
443 NLM_EXTERN Boolean ConvertPubSrcComDescsToFeats (SeqEntryPtr sep, Boolean pub, Boolean src, Boolean com, Boolean toProts, Boolean PNTR asked_about_prop, Boolean PNTR propagate_descriptions, CharPtr findstring);
444 
445 NLM_EXTERN void DeleteMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
446 
447 NLM_EXTERN Uint1 FindTrnaAA (CharPtr str);
448 NLM_EXTERN Uint1 FindTrnaAA3 (CharPtr str);
449 NLM_EXTERN CharPtr GetLongSymbolForAA (Char aa);
450 NLM_EXTERN Uint1 ParseTRnaString (CharPtr strx, BoolPtr justTrnaText, Uint1Ptr codon, Boolean noSingleLetter);
451 NLM_EXTERN CharPtr FindTrnaAAIndex (CharPtr str);
452 NLM_EXTERN Char FindResidueByName (CharPtr res_name, SeqCodeTablePtr sctp);
453 NLM_EXTERN ValNodePtr TokenizeTRnaString (CharPtr strx);
454 NLM_EXTERN Boolean ParseDegenerateCodon (tRNAPtr trp, Uint1Ptr codon);
455 NLM_EXTERN Boolean SerialNumberInString (CharPtr str);
456 
457 /* ModernizeRNAFields uses new RNAGenPtr choice of RnaRef.ext for misc_RNA, ncRNA, tmRNA */
458 
459 NLM_EXTERN void ModernizeRNAFields (
460   SeqFeatPtr sfp
461 );
462 
463 /* ModernizeGeneFields populates new GeneNomenclaturePtr field from OfficialNomenclature user object */
464 
465 NLM_EXTERN void ModernizeGeneFields (
466   SeqFeatPtr sfp
467 );
468 
469 /* for sorting and uniquing valnode list by (charptr) data.ptrvalue (with case sensitive/insensitive variants) */
470 
471 NLM_EXTERN int LIBCALLBACK SortVnpByStringCS (VoidPtr ptr1, VoidPtr ptr2);
472 NLM_EXTERN int LIBCALLBACK SortVnpByStringCI (VoidPtr ptr1, VoidPtr ptr2);
473 NLM_EXTERN int LIBCALLBACK SortVnpByStringCIUCFirst (VoidPtr ptr1, VoidPtr ptr2);
474 NLM_EXTERN int LIBCALLBACK SortVnpByStringCILCFirst (VoidPtr ptr1, VoidPtr ptr2);
475 
476 NLM_EXTERN ValNodePtr UniqueStringValNodeCS (ValNodePtr list);
477 NLM_EXTERN ValNodePtr UniqueStringValNodeCI (ValNodePtr list);
478 
479 NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCS (VoidPtr ptr1, VoidPtr ptr2);
480 NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCI (VoidPtr ptr1, VoidPtr ptr2);
481 
482 NLM_EXTERN int LIBCALLBACK SortVnpByString (VoidPtr ptr1, VoidPtr ptr2);
483 NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list);
484 
485 /* for sorting valnode list by choice */
486 
487 NLM_EXTERN int LIBCALLBACK SortByChoice (VoidPtr ptr1, VoidPtr ptr2);
488 
489 /* for sorting and uniquing valnode list by data.intvalue */
490 
491 NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2);
492 NLM_EXTERN ValNodePtr UniqueIntValNode (ValNodePtr list);
493 
494 /* for sorting and uniquing valnode list by data.ptrvalue */
495 
496 NLM_EXTERN int LIBCALLBACK SortByPtrvalue (VoidPtr ptr1, VoidPtr ptr2);
497 NLM_EXTERN ValNodePtr UniquePtrValNode (ValNodePtr list);
498 
499 /* keytag sorts/uniques and then owns valnode character list */
500 
501 typedef struct keytag {
502   Int2               num;
503   ValNodePtr         list;
504   CharPtr PNTR       index; /* elements point into above valnode list */
505 } KeyTag;                   /* used as substructure, not allocated separately */
506 
507 NLM_EXTERN void KeyTagInit (KeyTag PNTR ktp, ValNodePtr list);
508 NLM_EXTERN void KeyTagClear (KeyTag PNTR ktp);
509 
510 NLM_EXTERN Int2 KeyFromTag (KeyTag PNTR ktp, CharPtr tag);
511 NLM_EXTERN CharPtr TagFromKey (KeyTag PNTR ktp, Int2 key);
512 
513 /* inference qualifier utility */
514 
515 #define VALID_INFERENCE            0
516 #define EMPTY_INFERENCE_STRING     1
517 #define BAD_INFERENCE_PREFIX       2
518 #define BAD_INFERENCE_BODY         3
519 #define SINGLE_INFERENCE_FIELD     4
520 #define SPACES_IN_INFERENCE        5
521 #define INFERENCE_HAS_COMMENT      6
522 #define SAME_SPECIES_MISUSED       7
523 #define BAD_INFERENCE_ACCESSION    8
524 #define BAD_INFERENCE_ACC_VERSION  9
525 #define ACC_VERSION_NOT_PUBLIC    10
526 #define BAD_ACCESSION_TYPE        11
527 #define UNRECOGNIZED_DATABASE     12
528 
529 NLM_EXTERN Int2 ValidateInferenceQualifier (CharPtr val, Boolean fetchAccn);
530 
531 
532 /* from Colombe */
533 NLM_EXTERN SeqLocPtr StringSearchInBioseq (SeqIdPtr sip, CharPtr sub);
534 
535 /*****************************************************************************
536 *
537 *   SequinEntryList (sep, mydata, mycallback, index, indent)
538 *       traverses all Seq-entry nodes beginning with sep
539 *       calls mycallback () at each node
540 *       Does enter BioseqSets of _class "parts", but ignores the
541 *       parts set itself
542 *
543 *****************************************************************************/
544 
545 NLM_EXTERN Int4 SequinEntryList (SeqEntryPtr sep, Pointer mydata, SeqEntryFunc mycallback, Int4 index, Int2 indent);
546 
547 #define SequinEntryCount( a )  SequinEntryList( a ,NULL,NULL,0,0)
548 #define SequinEntryExplore(a,b,c) SequinEntryList(a, b, c, 0L, 0)
549 
550 /* Phrap reading function, based on sample code supplied by C. Magness, returns a SeqEntry list
551 of Bioseqs containing SeqGraphs, with individual reads removed and only contigs remaining */
552 
553 NLM_EXTERN SeqEntryPtr ReadPhrapFile (FILE *fp);
554 
555 /* Internal function to read quality scores, made available to parse separate DNA and quality score files */
556 
557 NLM_EXTERN SeqGraphPtr ReadPhrapQuality (FILE *fp, BioseqPtr bsp);
558 NLM_EXTERN SeqGraphPtr ReadPhrapQualityFC (FileCachePtr fcp, BioseqPtr bsp);
559 
560 /* SetPhrapContigOrder takes the results of ReadPhrapFile and a string indicating the order
561 of contigs, and returns a SeqEntryList in the desired order, with all other contigs removed */
562 
563 NLM_EXTERN SeqEntryPtr SetPhrapContigOrder (SeqEntryPtr head, CharPtr contigs);
564 
565 NLM_EXTERN void PrintQualityScores (BioseqPtr bsp, FILE *fp);
566 
567 NLM_EXTERN void TrimSeqGraph (SeqGraphPtr sgp, Int4 num_to_trim, Boolean from_left);
568 NLM_EXTERN void TrimQualityScores (BioseqPtr bsp, Int4 num_to_trim, Boolean from_left);
569 
570 NLM_EXTERN void ReverseSeqGraph (SeqGraphPtr sgp);
571 NLM_EXTERN void ReverseQualityScores (BioseqPtr bsp);
572 
573 
574 typedef void (*QualityWriteFunc) (CharPtr buf, Uint4 buflen, Pointer userdata);
575 
576 NLM_EXTERN void PrintQualityScoresToBuffer (BioseqPtr bsp, Boolean gapIsZero, Pointer userdata, QualityWriteFunc callback);
577 
578 /* special function for genome contig delta sequences with far pointers */
579 
580 NLM_EXTERN void PrintQualityScoresForContig (BioseqPtr bsp, Boolean gapIsZero, FILE* fp);
581 
582 /* more efficient function for far genomic contig, makes separate graphs */
583 
584 NLM_EXTERN SeqAnnotPtr PhrapGraphForContig (BioseqPtr bsp);
585 
586 /* ReadContigList builds a far segmented bioseq from a table of accessions, starts, stops,
587 lengths, and (optional) strands.  Gaps of a given length (with 0 start and stop) are also
588 allowed. */
589 
590 NLM_EXTERN SeqEntryPtr ReadContigList (FILE *fp, Boolean coordinatesOnMaster);
591 NLM_EXTERN SeqEntryPtr ReadContigListEx (FILE *fp, Boolean coordinatesOnMaster, CharPtr seqid, CharPtr title);
592 
593 /* ReadAsnFastaOrFlatFile reads object manager-registered ASN.1, FASTA, GenBank, EMBL, GenPept,
594 Feature table, Restriction table, Contig table, Message response, or saved UID list, with the
595 option of saving FASTA results as OBJ_FASTA (SimpleSeq) to avoid ID collisions */
596 
597 NLM_EXTERN Pointer ReadAsnFastaOrFlatFileEx (FILE *fp, Uint2Ptr datatypeptr, Uint2Ptr entityIDptr,
598                                            Boolean forceNuc, Boolean forceProt,
599                                            Boolean parseFastaSeqId, Boolean fastaAsSimpleSeq,
600                                            BoolPtr chars_stripped);
601 NLM_EXTERN Pointer ReadAsnFastaOrFlatFile (FILE *fp, Uint2Ptr datatypeptr, Uint2Ptr entityIDptr,
602                                            Boolean forceNuc, Boolean forceProt,
603                                            Boolean parseFastaSeqId, Boolean fastaAsSimpleSeq);
604 
605 /* ReadFeatureTableFile only handles >Feature tables */
606 
607 NLM_EXTERN Pointer ReadFeatureTableFile (
608   FILE *fp,
609   Uint2Ptr datatypeptr,
610   Uint2Ptr entityIDptr,
611   Int4Ptr lineP,
612   BoolPtr failP,
613   Boolean ignore_web_comments
614 );
615 
616 NLM_EXTERN BioseqPtr GetBioseqReferencedByAnnot (
617   SeqAnnotPtr sap,
618   Uint2 entityID
619 );
620 
621 /* ReadDeltaFasta reads a FASTA file, combining raw sequence and >?unk100 lines into
622 a delta Bioseq.  The file pointer stops at the next FASTA with a real SeqID. */
623 
624 NLM_EXTERN BioseqPtr ReadDeltaFasta (FILE *fp, Uint2Ptr entityIDptr);
625 
626 /* This function is identical to ReadDeltaFasta, except that the contents of
627  * chars_stripped will be set to TRUE if characters other than digits were stripped from
628  * the sequence, or FALSE if not.
629  */
630 NLM_EXTERN BioseqPtr ReadDeltaFastaEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped);
631 NLM_EXTERN BioseqPtr ReadDeltaFastaExEx (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped, BoolPtr cache_failed);
632 
633 /* ReadDeltaFastaWithEmptyDefline reads just one delta sequence with an empty
634  * definition line.
635  * Calling function should make sure that fp is set to the start of the line
636  * with the empty definition line and that there is a "gap sequence ID"
637  * present as the next definition line in the file.
638  */
639 NLM_EXTERN BioseqPtr ReadDeltaFastaWithEmptyDefline (FILE *fp, Uint2Ptr entityIDptr, BoolPtr chars_stripped);
640 
641 /* PromoteXrefs expands generef or protref feature cross-references (made by reading a
642 feature table with ReadAsnFastaOrFlatFile) to stand-alone gene features or protein features
643 and protein bioseqs.  It processes ALL features in the list - you give it the FIRST sfp. */
644 
645 NLM_EXTERN void PromoteXrefs (
646   SeqFeatPtr sfp,
647   BioseqPtr bsp,
648   Uint2 entityID
649 );
650 NLM_EXTERN void PromoteXrefsEx (
651   SeqFeatPtr sfp,
652   BioseqPtr bsp,
653   Uint2 entityID,
654   Boolean include_stop,
655   Boolean remove_trailingX,
656   Boolean gen_prod_set
657 );
658 NLM_EXTERN void PromoteXrefsExEx (
659   SeqFeatPtr sfp,
660   BioseqPtr bsp,
661   Uint2 entityID,
662   Boolean include_stop,
663   Boolean remove_trailingX,
664   Boolean gen_prod_set,
665   Boolean force_local_id,
666   BoolPtr seq_fetch_failP
667 );
668 
669 /* SetEmptyGeneticCodes imposes genetic code on all coding regions within a feature table */
670 
671 NLM_EXTERN void SetEmptyGeneticCodes (SeqAnnotPtr sap, Int2 genCode);
672 
673 /* AddIntervalToLocation is a convenience function to add a single interval, and is called by
674 ReadAsnFastaOrFlatFile internally. */
675 
676 NLM_EXTERN SeqLocPtr AddIntervalToLocation (SeqLocPtr loc, SeqIdPtr sip, Int4 start,
677                                             Int4 stop, Boolean partial5, Boolean partial3);
678 
679 /* AddQualifierToFeature applies cds product and gene qualifiers as protref or generef stored
680 as feature xrefs.  Most others (e.g., protein_id) are stored as gbquals.  PromoteXrefs can then
681 turn these special cases into the appropriate structures in fully expanded records. */
682 
683 NLM_EXTERN void AddQualifierToFeature (SeqFeatPtr sfp, CharPtr qual, CharPtr val);
684 
685 /* specialized string trimming functions */
686 
687 NLM_EXTERN CharPtr TrimSpacesAndSemicolons (CharPtr str);
688 NLM_EXTERN CharPtr TrimSpacesAndJunkFromEnds (CharPtr str, Boolean allowEllipsis);
689 
690 /* specialized cleanup for subsource and orgmod lists */
691 NLM_EXTERN void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location);
692 NLM_EXTERN void CleanOrgModList (OrgModPtr PNTR ompp);
693 
694 /* used by original BankIt to merge multiple primer subsources */
695 NLM_EXTERN void CleanSubSourcePrimers (SubSourcePtr PNTR sspp);
696 
697 NLM_EXTERN Boolean PubIsEffectivelyEmpty (PubdescPtr pdp);
698 
699 /* extracts and reinserts descriptors in a standard order */
700 NLM_EXTERN void NormalizeDescriptorOrder (SeqEntryPtr sep);
701 
702 /* BasicSeqEntryCleanup cleans up strings, moves gbquals to the appropriate field, and
703 does several other conversions, all without changing the itemID structure (which would
704 require reindexing) */
705 
706 NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep);
707 
708 /* AdvancedSeqEntryCleanup also resynchronizes CDS, mRNA, and protein partials */
709 
710 NLM_EXTERN void AdvancedSeqEntryCleanup (SeqEntryPtr sep);
711 
712 /* cleanup for a single descriptor, after editing */
713 NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep);
714 
715 /* Selective components of BasicSeqEntryCleanup can be called for QA filtering */
716 
717 NLM_EXTERN void CleanUpSeqFeat (SeqFeatPtr sfp, Boolean isEmblOrDdbj, Boolean isJscan, Boolean stripSerial, Boolean modernizeFeats, ValNodePtr PNTR publist);
718 
719 NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp);
720 
721 NLM_EXTERN Boolean FixWrongFuzzOnPlusStrand (SeqLocPtr location);
722 NLM_EXTERN Boolean FixWrongFuzzOnMinusStrand (SeqLocPtr location);
723 
724 NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (SeqFeatPtr sfp, Pointer userdata);
725 NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (SeqDescrPtr sdp, Pointer userdata);
726 
727 NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp);
728 NLM_EXTERN void CleanUpPubdescBody (PubdescPtr pdp, Boolean stripSerial);
729 
730 NLM_EXTERN void CleanStructuredComment (UserObjectPtr uop);
731 
732 NLM_EXTERN void SortSeqEntryQualifiers (SeqEntryPtr sep);
733 
734 NLM_EXTERN void CleanUpProteinTitles (SeqEntryPtr sep);
735 
736 /* BasicSeqAnnotCleanup is for cleaning up contents of separate named Seq-annot objects */
737 
738 NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap);
739 
740 NLM_EXTERN void RemoveUnnecessaryGeneXrefs (SeqFeatPtr sfp, Pointer userdata);
741 
742 /* CautiousSeqEntryCleanup is a gradual consolidation and replacement of functions in SeriousSeqEntryCleanup,
743 which does change the itemID structure, and is intended to be safe for a retrofit of the ID database */
744 
745 NLM_EXTERN void CautiousSeqEntryCleanup (SeqEntryPtr sep, SeqEntryFunc taxfun, SeqEntryFunc taxmerge);
746 
747 /* Convert a segmented or delta Bioseq to a raw Bioseq */
748 
749 NLM_EXTERN void SegOrDeltaBioseqToRaw (BioseqPtr bsp);
750 
751 NLM_EXTERN void ConvertSegSetsToDeltaSequences (SeqEntryPtr sep);
752 
753 NLM_EXTERN Boolean IsDeltaSeqWithFarpointers (BioseqPtr bsp);
754 
755 /* UserFieldSort is similar to ValNodeSort but for user fields within a user object */
756 NLM_EXTERN UserFieldPtr LIBCALL UserFieldSort (UserFieldPtr list, int (LIBCALLBACK *compar ) PROTO((VoidPtr, VoidPtr)));
757 
758 /* general purpose text finite state machine */
759 /* based on Practical Algorithms for Programmers by Binstock and Rex */
760 
761 struct TextFsa;
762 typedef struct TextFsa* TextFsaPtr;
763 
764 NLM_EXTERN TextFsaPtr TextFsaNew (void);
765 NLM_EXTERN void TextFsaAdd (TextFsaPtr tbl, CharPtr word);
766 NLM_EXTERN Int4 TextFsaNext (TextFsaPtr tbl, Int4 currState, Char ch, ValNodePtr PNTR matches);
767 NLM_EXTERN TextFsaPtr TextFsaFree (TextFsaPtr tbl);
768 NLM_EXTERN Boolean TextFsaGetStats (TextFsaPtr tbl, Int4Ptr highStateP, Int4Ptr numWordsP, Int4Ptr longestWordP);
769 
770 /* PCR_primer manipulation functions */
771 
772 typedef struct pcrset {
773   CharPtr  fwd_seq;
774   CharPtr  rev_seq;
775   CharPtr  fwd_name;
776   CharPtr  rev_name;
777   Int2     orig_order;
778 } PcrSet, PNTR PcrSetPtr;
779 
780 NLM_EXTERN ValNodePtr ParsePCRSet (BioSourcePtr biop);
781 NLM_EXTERN ValNodePtr ParsePCRStrings (
782   CharPtr fwd_primer_seq,
783   CharPtr rev_primer_seq,
784   CharPtr fwd_primer_name,
785   CharPtr rev_primer_name
786 );
787 NLM_EXTERN SubSourcePtr WritePCRSet (ValNodePtr pset);
788 NLM_EXTERN ValNodePtr FreePCRSet (ValNodePtr pset);
789 
790 NLM_EXTERN int LIBCALLBACK SortVnpByPCRSetSeq (VoidPtr ptr1, VoidPtr ptr2);
791 NLM_EXTERN int LIBCALLBACK SortVnpByPCRSetOrder (VoidPtr ptr1, VoidPtr ptr2);
792 
793 NLM_EXTERN ValNodePtr UniqueVnpByPCRSetSeq (ValNodePtr pset);
794 
795 NLM_EXTERN void ModernizePCRPrimers (
796   BioSourcePtr biop
797 );
798 
799 /*
800    very simple explore functions - VisitOn only does one chain, VisitIn goes into set components,
801    they now return a count of the number of nodes visited, and the callback can be NULL if the purpose
802    is simply to count nodes
803 */
804 
805 typedef void (*VisitDescriptorsFunc) (SeqDescrPtr sdp, Pointer userdata);
806 NLM_EXTERN Int4 VisitDescriptorsOnBsp (BioseqPtr bsp, Pointer userdata, VisitDescriptorsFunc callback);
807 NLM_EXTERN Int4 VisitDescriptorsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitDescriptorsFunc callback);
808 NLM_EXTERN Int4 VisitDescriptorsInSet (BioseqSetPtr bssp, Pointer userdata, VisitDescriptorsFunc callback);
809 NLM_EXTERN Int4 VisitDescriptorsOnSep (SeqEntryPtr sep, Pointer userdata, VisitDescriptorsFunc callback);
810 NLM_EXTERN Int4 VisitDescriptorsInSep (SeqEntryPtr sep, Pointer userdata, VisitDescriptorsFunc callback);
811 
812 typedef void (*VisitFeaturesFunc) (SeqFeatPtr sfp, Pointer userdata);
813 NLM_EXTERN Int4 VisitFeaturesOnSap (SeqAnnotPtr sap, Pointer userdata, VisitFeaturesFunc callback);
814 NLM_EXTERN Int4 VisitFeaturesOnBsp (BioseqPtr bsp, Pointer userdata, VisitFeaturesFunc callback);
815 NLM_EXTERN Int4 VisitFeaturesOnSet (BioseqSetPtr bssp, Pointer userdata, VisitFeaturesFunc callback);
816 NLM_EXTERN Int4 VisitFeaturesInSet (BioseqSetPtr bssp, Pointer userdata, VisitFeaturesFunc callback);
817 NLM_EXTERN Int4 VisitFeaturesOnSep (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback);
818 NLM_EXTERN Int4 VisitFeaturesInSep (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback);
819 
820 typedef void (*VisitAlignmentsFunc) (SeqAlignPtr sap, Pointer userdata);
821 NLM_EXTERN Int4 VisitAlignmentsOnSap (SeqAnnotPtr sap, Pointer userdata, VisitAlignmentsFunc callback);
822 NLM_EXTERN Int4 VisitAlignmentsOnBsp (BioseqPtr bsp, Pointer userdata, VisitAlignmentsFunc callback);
823 NLM_EXTERN Int4 VisitAlignmentsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitAlignmentsFunc callback);
824 NLM_EXTERN Int4 VisitAlignmentsInSet (BioseqSetPtr bssp, Pointer userdata, VisitAlignmentsFunc callback);
825 NLM_EXTERN Int4 VisitAlignmentsOnSep (SeqEntryPtr sep, Pointer userdata, VisitAlignmentsFunc callback);
826 NLM_EXTERN Int4 VisitAlignmentsInSep (SeqEntryPtr sep, Pointer userdata, VisitAlignmentsFunc callback);
827 
828 typedef void (*VisitGraphsFunc) (SeqGraphPtr sgp, Pointer userdata);
829 NLM_EXTERN Int4 VisitGraphsOnSap (SeqAnnotPtr sap, Pointer userdata, VisitGraphsFunc callback);
830 NLM_EXTERN Int4 VisitGraphsOnBsp (BioseqPtr bsp, Pointer userdata, VisitGraphsFunc callback);
831 NLM_EXTERN Int4 VisitGraphsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitGraphsFunc callback);
832 NLM_EXTERN Int4 VisitGraphsInSet (BioseqSetPtr bssp, Pointer userdata, VisitGraphsFunc callback);
833 NLM_EXTERN Int4 VisitGraphsOnSep (SeqEntryPtr sep, Pointer userdata, VisitGraphsFunc callback);
834 NLM_EXTERN Int4 VisitGraphsInSep (SeqEntryPtr sep, Pointer userdata, VisitGraphsFunc callback);
835 
836 typedef void (*VisitAnnotsFunc) (SeqAnnotPtr sap, Pointer userdata);
837 NLM_EXTERN Int4 VisitAnnotsOnBsp (BioseqPtr bsp, Pointer userdata, VisitAnnotsFunc callback);
838 NLM_EXTERN Int4 VisitAnnotsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitAnnotsFunc callback);
839 NLM_EXTERN Int4 VisitAnnotsInSet (BioseqSetPtr bssp, Pointer userdata, VisitAnnotsFunc callback);
840 NLM_EXTERN Int4 VisitAnnotsOnSep (SeqEntryPtr sep, Pointer userdata, VisitAnnotsFunc callback);
841 NLM_EXTERN Int4 VisitAnnotsInSep (SeqEntryPtr sep, Pointer userdata, VisitAnnotsFunc callback);
842 
843 typedef void (*VisitBioseqsFunc) (BioseqPtr bsp, Pointer userdata);
844 NLM_EXTERN Int4 VisitBioseqsInSet (BioseqSetPtr bssp, Pointer userdata, VisitBioseqsFunc callback);
845 NLM_EXTERN Int4 VisitBioseqsInSep (SeqEntryPtr sep, Pointer userdata, VisitBioseqsFunc callback);
846 
847 /* VisitSequences allows you to limit visitation to nucs or prots that aren't parts, or just to parts */
848 
849 #define VISIT_MAINS 1
850 #define VISIT_NUCS  2
851 #define VISIT_PROTS 3
852 #define VISIT_PARTS 4
853 
854 typedef void (*VisitSequencesFunc) (BioseqPtr bsp, Pointer userdata);
855 NLM_EXTERN Int4 VisitSequencesInSet (BioseqSetPtr bssp, Pointer userdata, Int2 filter, VisitSequencesFunc callback);
856 NLM_EXTERN Int4 VisitSequencesInSep (SeqEntryPtr sep, Pointer userdata, Int2 filter, VisitSequencesFunc callback);
857 
858 typedef void (*VisitSetsFunc) (BioseqSetPtr bssp, Pointer userdata);
859 NLM_EXTERN Int4 VisitSetsInSep (SeqEntryPtr sep, Pointer userdata, VisitSetsFunc callback);
860 NLM_EXTERN Int4 VisitSetsInSet (BioseqSetPtr bssp, Pointer userdata, VisitSetsFunc callback);
861 
862 /* visits components of pop/phy/mut/genbank sets, callback is at most nuc-prot set, can then call above functions */
863 
864 typedef void (*VisitElementsFunc) (SeqEntryPtr sep, Pointer userdata);
865 NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElementsFunc callback);
866 
867 /* visits all SeqIds within a SeqLoc, or within features, alignments, graphs, or annots */
868 
869 typedef void (*VisitSeqIdFunc) (SeqIdPtr sip, Pointer userdata);
870 NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqIdFunc callback);
871 
872 NLM_EXTERN Int4 VisitSeqIdsInBioseq (BioseqPtr bsp, Pointer userdata, VisitSeqIdFunc callback);
873 NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback);
874 NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback);
875 NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback);
876 NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback);
877 
878 /* visits all sub UserFields - if the data type is 11, VisitUserFieldsInUfp recurses */
879 
880 typedef void (*VisitUserFieldsFunc) (UserFieldPtr ufp, Pointer userdata);
881 NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback);
882 NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, VisitUserFieldsFunc callback);
883 
884 /* visits all sub UserObjects if the data type is 12 - needed to pack multiple user objects on a single feature.  Does not visit user objects which contain other user objects. */
885 
886 typedef void (*VisitUserObjectFunc) (UserObjectPtr uop, Pointer userdata);
887 NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback);
888 /* Visits all user objects, even if they contain other user objects */
889 NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback);
890 
891 /* explores sub UserObjects including "CombinedFeatureUserObjects" and finds by label  */
892 
893 NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag);
894 
895 /* creates "CombinedFeatureUserObjects" sfp->ext to combine two user objects */
896 
897 NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop);
898 
899 /* visits all publication descriptors or features */
900 
901 typedef void (*VisitPubdescsFunc) (PubdescPtr pdp, Pointer userdata);
902 NLM_EXTERN Int4 VisitPubdescsOnBsp (BioseqPtr bsp, Pointer userdata, VisitPubdescsFunc callback);
903 NLM_EXTERN Int4 VisitPubdescsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitPubdescsFunc callback);
904 NLM_EXTERN Int4 VisitPubdescsInSet (BioseqSetPtr bssp, Pointer userdata, VisitPubdescsFunc callback);
905 NLM_EXTERN Int4 VisitPubdescsOnSep (SeqEntryPtr sep, Pointer userdata, VisitPubdescsFunc callback);
906 NLM_EXTERN Int4 VisitPubdescsInSep (SeqEntryPtr sep, Pointer userdata, VisitPubdescsFunc callback);
907 
908 /* visits all authors in a publication */
909 
910 typedef void (*VisitAuthorFunc) (NameStdPtr nsp, Pointer userdata);
911 NLM_EXTERN Int4 VisitAuthorsInPub (PubdescPtr pdp, Pointer userdata, VisitAuthorFunc callback);
912 
913 /* visits all biosource descriptors or features */
914 
915 typedef void (*VisitBioSourcesFunc) (BioSourcePtr biop, Pointer userdata);
916 NLM_EXTERN Int4 VisitBioSourcesOnBsp (BioseqPtr bsp, Pointer userdata, VisitBioSourcesFunc callback);
917 NLM_EXTERN Int4 VisitBioSourcesOnSet (BioseqSetPtr bssp, Pointer userdata, VisitBioSourcesFunc callback);
918 NLM_EXTERN Int4 VisitBioSourcesInSet (BioseqSetPtr bssp, Pointer userdata, VisitBioSourcesFunc callback);
919 NLM_EXTERN Int4 VisitBioSourcesOnSep (SeqEntryPtr sep, Pointer userdata, VisitBioSourcesFunc callback);
920 NLM_EXTERN Int4 VisitBioSourcesInSep (SeqEntryPtr sep, Pointer userdata, VisitBioSourcesFunc callback);
921 
922 /* function to scan binary ASN.1 file of entire release as Bioseq-set, simple explore from successive top seps */
923 /* compressed can be TRUE only on UNIX, where it does a popen on zcat to decompress on-the-fly */
924 /* although it now returns a count of components visited, the callback cannot be NULL for this function */
925 
926 typedef void (*ScanBioseqSetFunc) (SeqEntryPtr sep, Pointer userdata);
927 NLM_EXTERN Int4 ScanBioseqSetRelease (
928   CharPtr inputFile,
929   Boolean binary,
930   Boolean compressed,
931   Pointer userdata,
932   ScanBioseqSetFunc callback
933 );
934 
935 /* multi-thread safe version does not free SeqEntryPtr after calling callback, use FreeScanSeqEntryMT */
936 NLM_EXTERN Int4 ScanBioseqSetReleaseMT (
937   CharPtr inputFile,
938   Boolean binary,
939   Boolean compressed,
940   Pointer userdata,
941   ScanBioseqSetFunc callback
942 );
943 NLM_EXTERN SeqEntryPtr LIBCALL FreeScanSeqEntryMT (
944   SeqEntryPtr sep
945 );
946 
947 /* More automatic version of ReadAsnFastaOrFlatFile, can read BioseqSet release file */
948 
949 NLM_EXTERN Int4 ReadSequenceAsnFile (
950   CharPtr inputFile,
951   Boolean binary,
952   Boolean compressed,
953   Pointer userdata,
954   ScanBioseqSetFunc callback
955 );
956 
957 /* function to scan binary ASN.1 file of entrezgene release as Entrezgene-Set */
958 
959 typedef void (*ScanEntrezgeneSetFunc) (EntrezgenePtr egp, Pointer userdata);
960 NLM_EXTERN Int4 ScanEntrezgeneSetRelease (
961   CharPtr inputFile,
962   Boolean binary,
963   Boolean compressed,
964   Pointer userdata,
965   ScanEntrezgeneSetFunc callback
966 );
967 
968 /* PubMed registered fetch functionality */
969 
970 NLM_EXTERN PubmedEntryPtr LIBCALL GetPubMedForUid (Int4 uid);
971 
972 /* internal support type, registration function */
973 
974 typedef PubmedEntryPtr (LIBCALLBACK * PubMedFetchFunc) (Int4 uid);
975 
976 NLM_EXTERN void LIBCALL PubMedSetFetchFunc (PubMedFetchFunc func);
977 
978 NLM_EXTERN void FirstNameToInitials (CharPtr first, CharPtr inits, size_t maxsize);
979 
980 extern CharPtr MyFGetLine (FILE *fp, ValNodePtr PNTR current_data);
981 
982 #if defined (WIN32)
983 extern char * __stdcall AbstractReadFunction (Pointer userdata);
984 extern void __stdcall AbstractReportError (TErrorInfoPtr err_ptr, Pointer userdata);
985 #else
986 extern char * AbstractReadFunction (Pointer userdata);
987 extern void AbstractReportError (TErrorInfoPtr err_ptr, Pointer userdata);
988 #endif
989 
990 typedef struct readbuffer {
991   FILE *fp;
992   ValNodePtr current_data;
993 } ReadBufferData, PNTR ReadBufferPtr;
994 
995 extern void FreeBufferedReadList (ValNodePtr vnp);
996 
997 extern CharPtr AlignmentStringToSequenceString (CharPtr aln_str, Uint1 moltype);
998 extern SeqEntryPtr MakeSequinDataFromAlignment (TAlignmentFilePtr afp, Uint1 moltype);
999 extern SeqEntryPtr MakeSequinDataFromAlignmentEx (TAlignmentFilePtr afp, Uint1 moltype, Boolean check_ids);
1000 extern SeqEntryPtr make_seqentry_for_seqentry (SeqEntryPtr sep);
1001 extern Boolean ConvertOnePseudoCDSToMiscFeat (SeqFeatPtr sfp);
1002 NLM_EXTERN Boolean ConvertOnePseudoCDSToMiscFeatEx (SeqFeatPtr sfp, Boolean remove_product);
1003 extern void ConvertPseudoCDSToMiscFeatsForEntityID (Uint2 entityID);
1004 
1005 extern SeqAlignPtr FindAlignmentsForBioseq (BioseqPtr bsp);
1006 extern ValNodePtr FindAlignSeqAnnotsForBioseq (BioseqPtr bsp);
1007 extern Boolean IsSequenceFirstInPairwise (SeqEntryPtr sep, SeqIdPtr sip);
1008 extern SeqAlignPtr RemoveOneSequenceFromAlignment (SeqIdPtr sip, SeqAlignPtr salphead);
1009 extern Boolean RemoveSequenceFromAlignments (SeqEntryPtr sep, SeqIdPtr sip);
1010 extern BioseqPtr ReadFastaOnly (FILE *fp,
1011                               Boolean forceNuc, Boolean forceProt,
1012                               BoolPtr chars_stripped,
1013                               CharPtr lastchar);
1014 extern void MergeFeatureIntervalsToParts (SeqFeatPtr sfp, Boolean ordered);
1015 
1016 extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata);
1017 
1018 typedef struct loginfo
1019 {
1020   FILE *fp;
1021   Boolean data_in_log;
1022   CharPtr display_title;
1023   Char path[PATH_MAX];
1024 } LogInfoData, PNTR LogInfoPtr;
1025 
1026 extern LogInfoPtr OpenLog (CharPtr display_title);
1027 extern LogInfoPtr FreeLog (LogInfoPtr lip);
1028 
1029 NLM_EXTERN void FixNonWGSSets (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1030 
1031 /* structures and functions for the Discrepancy Report */
1032 typedef void (*ClickableCallback) (ValNodePtr item_list, Pointer userdata);
1033 typedef void (*ClickableCallbackDataFree) (Pointer userdata);
1034 typedef void (*AutofixCallback) (ValNodePtr item_list, Pointer userdata, LogInfoPtr lip);
1035 
1036 typedef struct clickableitem
1037 {
1038   Uint4                     clickable_item_type;
1039   CharPtr                   description;
1040   ValNodePtr                item_list;
1041   ClickableCallback         callback_func;
1042   ClickableCallbackDataFree datafree_func;
1043   Pointer                   callback_data;
1044   Boolean                   chosen;
1045   ValNodePtr                subcategories;
1046   Boolean                   expanded;
1047   Int4                      level;
1048   AutofixCallback           autofix_func;  /* note - autofix functions can be set for an
1049                                             * entire category or for an individual clickable
1050                                             * item.  Don't set autofix functions in both
1051                                             * places or they will both be called.
1052                                             */
1053   Pointer                   autofix_data;  /* data for item-specific autofixes */
1054 } ClickableItemData, PNTR ClickableItemPtr;
1055 
1056 extern ClickableItemPtr
1057 NewClickableItem
1058 (Uint4           clickable_item_type,
1059  CharPtr         description_fmt,
1060  ValNodePtr      item_list);
1061 
1062 extern ClickableItemPtr
1063 NewClickableItemNoList
1064 (Uint4           clickable_item_type,
1065  CharPtr         description);
1066 
1067 extern ValNodePtr ClickableItemObjectListFree (ValNodePtr vnp);
1068 extern ValNodePtr ClickableItemObjectListCopy (ValNodePtr orig);
1069 extern ClickableItemPtr ClickableItemFree (ClickableItemPtr cip);
1070 extern ValNodePtr FreeClickableList (ValNodePtr list);
1071 extern Boolean AnyDiscrepanciesChosen (ValNodePtr cip_list);
1072 NLM_EXTERN void ChooseAllDiscrepancies (ValNodePtr cip_list);
1073 
1074 extern int LIBCALLBACK SortVnpByClickableItemDescription (VoidPtr ptr1, VoidPtr ptr2);
1075 NLM_EXTERN int LIBCALLBACK SortVnpByClickableItemChosen (VoidPtr ptr1, VoidPtr ptr2);
1076 
1077 extern void ExpandClickableItemList (ValNodePtr vnp);
1078 extern void ContractClickableItemList (ValNodePtr vnp);
1079 
1080 NLM_EXTERN void RemoveDuplicateItems (ValNodePtr PNTR item_list);
1081 
1082 /* To add a new type of test, do ALL Of the following:
1083  * 1. add an item to the DiscrepancyType enum (this will fill the clickable_item_type value)
1084  * 2. add a collection function and declare it with the others
1085  * 3. add an item to discrepancy_info_list that corresponds with the position of the
1086  *    new enum value.  If you are combining multiple types in one collection function,
1087  *    be sure to list them together.
1088  */
1089 
1090 /* SHOW_TRANSL_EXCEPT added by J. Chen */
1091 /* SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME added, J. Chen */
1092 /* TEST_DEFLINE_EXIST added, J. Chen */
1093 typedef enum {
1094   DISC_GENE_MISSING = 0,
1095   DISC_SUPERFLUOUS_GENE,
1096   DISC_GENE_MISSING_LOCUS_TAG,
1097   DISC_GENE_DUPLICATE_LOCUS_TAG,
1098   DISC_GENE_LOCUS_TAG_BAD_FORMAT,
1099   DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX,
1100   DISC_NON_GENE_LOCUS_TAG,
1101   DISC_COUNT_NUCLEOTIDES,
1102   DISC_MISSING_PROTEIN_ID,
1103   DISC_INCONSISTENT_PROTEIN_ID_PREFIX,
1104   DISC_GENE_CDS_mRNA_LOCATION_CONFLICT,
1105   DISC_GENE_PRODUCT_CONFLICT,
1106   DISC_GENE_DUPLICATE_LOCUS,
1107   DISC_EC_NUMBER_NOTE,
1108   DISC_PSEUDO_MISMATCH,
1109   DISC_JOINED_FEATURES,
1110   DISC_OVERLAPPING_GENES,
1111   DISC_OVERLAPPING_CDS,
1112   DISC_CONTAINED_CDS,
1113   DISC_RNA_CDS_OVERLAP,
1114   DISC_SHORT_CONTIG,
1115   DISC_INCONSISTENT_BIOSRC,
1116   DISC_SUSPECT_PRODUCT_NAME,
1117   DISC_PRODUCT_NAME_TYPO,
1118   DISC_PRODUCT_NAME_QUICKFIX,
1119   DISC_INCONSISTENT_BIOSRC_DEFLINE,
1120   DISC_PARTIAL_CDS_IN_COMPLETE_SEQUENCE,
1121   DISC_EC_NUMBER_ON_HYPOTHETICAL_PROTEIN,
1122   DISC_NO_TAXLOOKUP,
1123   DISC_BAD_TAXLOOKUP,
1124   DISC_SHORT_SEQUENCE,
1125   DISC_SUSPECT_PHRASES,
1126   DISC_SUSPICIOUS_NOTE_TEXT,
1127   DISC_COUNT_TRNA,
1128   DISC_DUP_TRNA,
1129   DISC_BADLEN_TRNA,
1130   DISC_STRAND_TRNA,
1131   DISC_COUNT_RRNA,
1132   DISC_DUP_RRNA,
1133   DISC_RNA_NO_PRODUCT,
1134   DISC_TRANSL_NO_NOTE,
1135   DISC_NOTE_NO_TRANSL,
1136   DISC_TRANSL_TOO_LONG,
1137   DISC_CDS_OVERLAP_TRNA,
1138   DISC_COUNT_PROTEINS,
1139   DISC_FEAT_OVERLAP_SRCFEAT,
1140   DISC_MISSING_GENPRODSET_PROTEIN,
1141   DISC_DUP_GENPRODSET_PROTEIN,
1142   DISC_MISSING_GENPRODSET_TRANSCRIPT_ID,
1143   DISC_DUP_GENPRODSET_TRANSCRIPT_ID,
1144   DISC_PERCENTN,
1145   DISC_N_RUNS,
1146   DISC_ZERO_BASECOUNT,
1147   DISC_ADJACENT_PSEUDOGENE,
1148   DISC_LONG_NO_ANNOTATION,
1149   DISC_NO_ANNOTATION,
1150   DISC_INFLUENZA_DATE_MISMATCH,
1151   DISC_SHORT_INTRON,
1152   DISC_MISSING_VIRAL_QUALS,
1153   DISC_SRC_QUAL_PROBLEM,
1154   DISC_MISSING_SRC_QUAL,
1155   DISC_DUP_SRC_QUAL,
1156   DISC_DUP_SRC_QUAL_DATA,
1157   DISC_HAPLOTYPE_MISMATCH,
1158   DISC_FEATURE_MOLTYPE_MISMATCH,
1159   DISC_CDS_WITHOUT_MRNA,
1160   DISC_EXON_INTRON_CONFLICT,
1161   DISC_FEATURE_COUNT,
1162   DISC_SPECVOUCHER_TAXNAME_MISMATCH,
1163   DISC_GENE_PARTIAL_CONFLICT,
1164   DISC_FLATFILE_FIND_ONCALLER,
1165   DISC_FLATFILE_FIND_ONCALLER_FIXABLE,
1166   DISC_FLATFILE_FIND_ONCALLER_UNFIXABLE,
1167   DISC_CDS_PRODUCT_FIND,
1168   DISC_DUP_DEFLINE,
1169   DUP_DISC_ATCC_CULTURE_CONFLICT,
1170   DISC_USA_STATE,
1171   DISC_INCONSISTENT_MOLTYPES,
1172   DISC_SUBMITBLOCK_CONFLICT,
1173   DISC_POSSIBLE_LINKER,
1174   DISC_TITLE_AUTHOR_CONFLICT,
1175   DISC_BAD_GENE_STRAND,
1176   DISC_MAP_CHROMOSOME_CONFLICT,
1177   DISC_RBS_WITHOUT_GENE,
1178   DISC_CITSUBAFFIL_CONFLICT,
1179   DISC_REQUIRED_CLONE,
1180   DISC_SOURCE_QUALS_ASNDISC,
1181   DISC_mRNA_ON_WRONG_SEQUENCE_TYPE,
1182   DISC_RETROVIRIDAE_DNA,
1183   DISC_CHECK_AUTH_CAPS,
1184   DISC_CHECK_RNA_PRODUCTS_AND_COMMENTS,
1185   DISC_MICROSATELLITE_REPEAT_TYPE,
1186   DISC_MITOCHONDRION_REQUIRED,
1187   DISC_UNPUB_PUB_WITHOUT_TITLE,
1188   DISC_QUALITY_SCORES,
1189   DISC_INTERNAL_TRANSCRIBED_SPACER_RRNA,
1190   DISC_PARTIAL_PROBLEMS,
1191   DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS,
1192   DISC_BACTERIAL_PARTIAL_NONEXTENDABLE_EXCEPTION,
1193   DISC_SUSPECT_RRNA_PRODUCTS,
1194   DISC_SUSPECT_MISC_FEATURES,
1195   DISC_BACTERIA_MISSING_STRAIN,
1196   DISC_MISSING_DEFLINES,
1197   DISC_MISSING_AFFIL,
1198   DISC_BACTERIA_SHOULD_NOT_HAVE_ISOLATE,
1199   DISC_BACTERIA_SHOULD_NOT_HAVE_MRNA,
1200   DISC_CDS_HAS_NEW_EXCEPTION,
1201   DISC_TRINOMIAL_SHOULD_HAVE_QUALIFIER,
1202   DISC_METAGENOMIC,
1203   DISC_METAGENOME_SOURCE,
1204   ONCALLER_GENE_MISSING,
1205   ONCALLER_SUPERFLUOUS_GENE,
1206   DISC_SHORT_RRNA,
1207   ONCALLER_CHECK_AUTHORITY,
1208   ONCALLER_CONSORTIUM,
1209   ONCALLER_STRAIN_CULTURE_COLLECTION_MISMATCH,
1210   ONCALLER_MULTISRC,
1211   ONCALLER_MULTIPLE_CULTURE_COLLECTION,
1212   DISC_SEGSETS_PRESENT,
1213   DISC_NONWGS_SETS_PRESENT,
1214   DISC_FEATURE_LIST,
1215   DISC_CATEGORY_HEADER,
1216   DISC_MISMATCHED_COMMENTS,
1217   DISC_STRAIN_TAXNAME_MISMATCH,
1218   DISC_HUMAN_HOST,
1219   DISC_BAD_BACTERIAL_GENE_NAME,
1220   TEST_BAD_GENE_NAME,
1221   ONCALLER_ORDERED_LOCATION,
1222   ONCALLER_COMMENT_PRESENT,
1223   ONCALLER_DEFLINE_ON_SET,
1224   ONCALLER_HIV_RNA_INCONSISTENT,
1225   SHORT_PROT_SEQUENCES,
1226   TEST_EXON_ON_MRNA,
1227   TEST_HAS_PROJECT_ID,
1228   ONCALLER_HAS_STANDARD_NAME,
1229   ONCALLER_MISSING_STRUCTURED_COMMENTS,
1230   DISC_REQUIRED_STRAIN,
1231   MISSING_GENOMEASSEMBLY_COMMENTS,
1232   DISC_BACTERIAL_TAX_STRAIN_MISMATCH,
1233   TEST_CDS_HAS_CDD_XREF,
1234   TEST_UNUSUAL_NT,
1235   TEST_LOW_QUALITY_REGION,
1236   TEST_ORGANELLE_NOT_GENOMIC,
1237   TEST_UNWANTED_SPACER,
1238   TEST_ORGANELLE_PRODUCTS,
1239   TEST_SP_NOT_UNCULTURED,
1240   TEST_BAD_MRNA_QUAL,
1241   TEST_UNNECESSARY_ENVIRONMENTAL,
1242   TEST_UNNECESSARY_VIRUS_GENE,
1243   TEST_UNWANTED_SET_WRAPPER,
1244   TEST_MISSING_PRIMER,
1245   TEST_UNUSUAL_MISC_RNA,
1246   TEST_AMPLIFIED_PRIMERS_NO_ENVIRONMENTAL_SAMPLE,
1247   TEST_DUP_GENES_OPPOSITE_STRANDS,
1248   TEST_SMALL_GENOME_SET_PROBLEM,
1249   TEST_OVERLAPPING_RRNAS,
1250   TEST_MRNA_SEQUENCE_MINUS_STRAND_FEATURES,
1251   TEST_TAXNAME_NOT_IN_DEFLINE,
1252   TEST_COUNT_UNVERIFIED,
1253   SHOW_TRANSL_EXCEPT,
1254   SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME,
1255   TEST_DEFLINE_PRESENT,
1256   TEST_MRNA_OVERLAPPING_PSEUDO_GENE,
1257   FIND_OVERLAPPED_GENES,
1258   DISC_BIOMATERIAL_TAXNAME_MISMATCH,
1259   DISC_CULTURE_TAXNAME_MISMATCH,
1260   DISC_CHECK_AUTH_NAME,
1261   NON_RETROVIRIDAE_PROVIRAL,
1262   RNA_PROVIRAL,
1263   SHORT_SEQUENCES_200,
1264   DISC_10_PERCENTN,
1265   N_RUNS_14,
1266   MOLTYPE_NOT_MRNA,
1267   TECHNIQUE_NOT_TSA,
1268   MISSING_STRUCTURED_COMMENT,
1269   MISSING_PROJECT,
1270   MULTIPLE_CDS_ON_MRNA,
1271   DUP_DISC_CBS_CULTURE_CONFLICT,
1272   DIVISION_CODE_CONFLICTS,
1273   RRNA_NAME_CONFLICTS,
1274   EUKARYOTE_SHOULD_HAVE_MRNA,
1275   MRNA_SHOULD_HAVE_PROTEIN_TRANSCRIPT_IDS,
1276   ONCALLER_COUNTRY_COLON,
1277   ONCALLER_BIOPROJECT_ID,
1278   ONCALLER_STRAIN_TAXNAME_CONFLICT,
1279   ONCALLER_MORE_NAMES_COLLECTED_BY,
1280   ONCALLER_MORE_OR_SPEC_NAMES_IDENTIFIED_BY,
1281   ONCALLER_SUSPECTED_ORG_IDENTIFIED,
1282   ONCALLER_SUSPECTED_ORG_COLLECTED,
1283   ONCALLER_SWITCH_STRUCTURED_COMMENT_PREFIX,
1284   ONCALLER_CITSUB_AFFIL_DUP_TEXT,
1285   ONCALLER_DUPLICATE_PRIMER_SET,
1286   END_COLON_IN_COUNTRY,
1287   DISC_PROTEIN_NAMES,
1288   DISC_TITLE_ENDS_WITH_SEQUENCE,
1289   DISC_INCONSISTENT_STRUCTURED_COMMENTS,
1290   DISC_INCONSISTENT_DBLINK,
1291   DISC_INCONSISTENT_MOLINFO_TECH,
1292   DISC_GAPS,
1293   DISC_BAD_BGPIPE_QUALS,
1294   TEST_SHORT_LNCRNA,
1295   TEST_TERMINAL_NS,
1296   TEST_ALIGNMENT_HAS_SCORE,
1297   UNCULTURED_NOTES_ONCALLER,
1298   SEQ_ID_PHRASES,
1299   NO_PRODUCT_STRING,
1300   MAX_DISC_TYPE
1301 } DiscrepancyType;
1302 
1303 typedef enum {
1304   eReportTypeDiscrepancy = 1,
1305   eReportTypeOnCaller,
1306   eReportTypeMegaReport,
1307   eReportTypeTSA,
1308   eReportType_End
1309 } EDiscrepancyReportType;
1310 
1311 extern Boolean IsTestTypeAppropriateForReportType (Int4 test_type, EDiscrepancyReportType report_type);
1312 
1313 extern void PrintDiscrepancyTestList (FILE *fp);
1314 
1315 extern void SetDiscrepancyLevels (ValNodePtr discrepancy_list, Int4 level);
1316 
1317 extern CharPtr GetDiscrepancyTestConfName (DiscrepancyType dtype);
1318 extern CharPtr GetDiscrepancyTestSettingName (DiscrepancyType dtype);
1319 extern DiscrepancyType GetDiscrepancyTypeFromSettingName (CharPtr setting_name);
1320 extern Boolean DiscrepancyTestHasAutofix (DiscrepancyType dtype);
1321 
1322 typedef struct discrepancyconfig
1323 {
1324   Boolean conf_list[MAX_DISC_TYPE];
1325   Boolean use_feature_table_format;
1326   Boolean use_big_test_set;
1327   Boolean is_big_sequence;
1328 } DiscrepancyConfigData, PNTR DiscrepancyConfigPtr;
1329 
1330 extern DiscrepancyConfigPtr DiscrepancyConfigFree (DiscrepancyConfigPtr dcp);
1331 extern DiscrepancyConfigPtr DiscrepancyConfigNew (void);
1332 extern DiscrepancyConfigPtr DiscrepancyConfigCopy (DiscrepancyConfigPtr dcp);
1333 extern DiscrepancyConfigPtr ReadDiscrepancyConfig (void);
1334 extern DiscrepancyConfigPtr ReadDiscrepancyConfigEx (CharPtr report_config_name);
1335 extern void SaveDiscrepancyConfig (DiscrepancyConfigPtr dcp);
1336 extern void SaveDiscrepancyConfigEx (DiscrepancyConfigPtr dcp, CharPtr report_name);
1337 extern void DisableTRNATests (DiscrepancyConfigPtr dcp);
1338 extern CharPtr SetDiscrepancyReportTestsFromString (CharPtr list, Boolean enable, DiscrepancyConfigPtr dcp);
1339 extern void ConfigureForBigSequence (DiscrepancyConfigPtr dcp);
1340 extern void ConfigureForGenomes (DiscrepancyConfigPtr dcp);
1341 extern void ConfigureForReportType (DiscrepancyConfigPtr dcp, EDiscrepancyReportType report_type);
1342 
1343 typedef void (*PerformDiscrepancyTest) PROTO ((ValNodePtr PNTR, ValNodePtr));
1344 
1345 extern ValNodePtr CollectDiscrepancies (DiscrepancyConfigPtr dcp, ValNodePtr sep_list, PerformDiscrepancyTest taxlookup);
1346 extern void AutofixDiscrepancies (ValNodePtr vnp, Boolean fix_all, LogInfoPtr lip);
1347 extern void ChooseFixableDiscrepancies (ValNodePtr vnp);
1348 extern CharPtr GetDiscrepancyItemText (ValNodePtr vnp);
1349 extern CharPtr GetDiscrepancyItemTextEx (ValNodePtr vnp, CharPtr filename);
1350 extern void VisitGenProdSetFeatures (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback);
1351 extern ValNodePtr ReplaceDiscrepancyItemWithFeatureTableStrings (ValNodePtr feat_list);
1352 extern CharPtr GetParentLabelForDiscrepancyItem (ValNodePtr vnp);
1353 extern void WriteDiscrepancy (FILE *fp, ClickableItemPtr dip, Boolean use_feature_table_fmt);
1354 extern void WriteDiscrepancyEx (FILE *fp, ClickableItemPtr dip, Boolean use_feature_table_fmt, Boolean cmd_line, CharPtr descr_prefix, Boolean list_features_if_subcat);
1355 extern int LIBCALLBACK SortVnpByDiscrepancyDescription (VoidPtr ptr1, VoidPtr ptr2);
1356 extern int LIBCALLBACK SortVnpByDiscrepancyItemText (VoidPtr ptr1, VoidPtr ptr2);
1357 extern void ValNodeReverse (ValNodePtr PNTR list);
1358 
1359 /* Individual discrepancy test function declarations */
1360 extern const CharPtr kOverlappingCDSNoteText;
1361 extern const CharPtr kOverlappingCDSNeedsNoteFmt;
1362 extern void AddOverlappingCodingRegionDiscrepancies (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1363 extern void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTagsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Boolean exclude_dirsub);
1364 extern void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1365 extern void FindShortIntronsEx (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list, Boolean check_organelles);
1366 extern void FindShortIntrons (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1367 extern void CheckBioSourceQuals (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1368 extern void FindExtendablePartials (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1369 extern void FindBacterialNonExtendablePartials (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1370 NLM_EXTERN void FindMismatchedComments (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list);
1371 
1372 /* autofix functions */
1373 NLM_EXTERN void MarkOverlappingCDSs (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1374 NLM_EXTERN void FixBacterialNonExtendablePartials (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1375 NLM_EXTERN void FixExtendablePartials (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1376 NLM_EXTERN void FixMismatchedComments (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1377 NLM_EXTERN void FixHumanHosts (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1378 NLM_EXTERN void FixOrderedLocations (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1379 NLM_EXTERN void OncallerToolPseudoDiscrepanciesFix (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1380 NLM_EXTERN void OncallerToolFindEcoNoEnvFix (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1381 NLM_EXTERN void AddExceptionsToShortIntrons (ValNodePtr item_list, Pointer data, LogInfoPtr lip);
1382 
1383 NLM_EXTERN Boolean IsShortrRNA (SeqFeatPtr sfp);
1384 
1385 /* structure shared by tbl2asn and discrepancy report functions */
1386 typedef struct genprodsetdiscrepancylists {
1387   ValNodePtr cds_product_list;
1388   ValNodePtr mrna_product_list;
1389   ValNodePtr missing_mrna_product;
1390   ValNodePtr missing_protein_id;
1391 } GenProdSetDiscrepancyListsData, PNTR GenProdSetDiscrepancyListsPtr;
1392 
1393 extern void CheckGenProdSetsInSeqEntry (SeqEntryPtr sep, GenProdSetDiscrepancyListsPtr lists);
1394 
1395 
1396 typedef struct protidlists {
1397   ValNodePtr missing_gnl_list;
1398   ValNodePtr gnl_list;
1399 } ProtIdListsData, PNTR ProtIdListsPtr;
1400 
1401 /* structure shared by tbl2asn and discrepancy report functions */
1402 typedef struct globaldiscrepancy {
1403   CharPtr str;
1404   Uint1   data_choice;
1405   Pointer data;
1406 } GlobalDiscrepancyData, PNTR GlobalDiscrepancyPtr;
1407 
1408 extern GlobalDiscrepancyPtr GlobalDiscrepancyNew (CharPtr str, Uint1 data_choice, Pointer data);
1409 extern GlobalDiscrepancyPtr GlobalDiscrepancyFree (GlobalDiscrepancyPtr g);
1410 extern ValNodePtr FreeGlobalDiscrepancyList (ValNodePtr vnp);
1411 extern void ConvertGlobalDiscrepancyToText (GlobalDiscrepancyPtr g, Boolean use_feature_fmt, CharPtr filename);
1412 extern void ConvertGlobalDiscrepancyListToText (ValNodePtr vnp, Boolean use_feature_fmt, CharPtr filename);
1413 extern ValNodePtr GetGlobalDiscrepancyItem (GlobalDiscrepancyPtr g);
1414 extern CharPtr GetGlobalDiscrepancyStr (GlobalDiscrepancyPtr g);
1415 NLM_EXTERN int LIBCALLBACK SortVnpByGlobalDiscrepancyString (VoidPtr ptr1, VoidPtr ptr2);
1416 NLM_EXTERN int LIBCALLBACK SortVnpByGlobalDiscrepancyStringCaseSensitive (VoidPtr ptr1, VoidPtr ptr2);
1417 extern ClickableItemPtr
1418 ReportNonUniqueGlobalDiscrepancy
1419 (ValNodePtr vnp,
1420  CharPtr    label_fmt,
1421  CharPtr    ind_cat_fmt,
1422  Uint4      clickable_item_type,
1423  Boolean    keep_top_category);
1424 extern ValNodePtr ReportInconsistentGlobalDiscrepancyPrefixes
1425 (ValNodePtr vnp,
1426  CharPtr    label_fmt,
1427  Uint4      clickable_item_type);
1428 extern ValNodePtr ReportInconsistentGlobalDiscrepancyStrings
1429 (ValNodePtr vnp,
1430  CharPtr    label_fmt,
1431  Uint4      clickable_item_type);
1432 extern ClickableItemPtr ReportMissingFields (ValNodePtr list, CharPtr label_fmt, Uint4 clickable_item_type);
1433 extern ClickableItemPtr ReportBadLocusTagFormat (ValNodePtr list);
1434 extern ClickableItemPtr FindAdjacentDuplicateLocusTagGenes (ValNodePtr locus_tag_list);
1435 extern void FindProteinIDCallback (BioseqPtr bsp, Pointer userdata);
1436 
1437 
1438 /* formats for global discrepancies also used by tbl2asn */
1439 extern CharPtr discReportDuplicateLocusTagFmt;
1440 extern CharPtr discReportOneDuplicateLocusTagFmt;
1441 extern CharPtr discReportDuplicateProteinIDFmt;
1442 extern CharPtr discReportOneDuplicateProteinIDFmt;
1443 extern CharPtr discReportDuplicateTranscriptIdFmt;
1444 extern CharPtr discReportOneDuplicateTranscriptIdFmt;
1445 extern CharPtr discReportInconsistentLocusTagPrefixFmt;
1446 extern CharPtr discReportMissingLocusTags;
1447 extern CharPtr discReportInconsistentProteinIDPrefixFmt;
1448 extern CharPtr discReportBadProteinIdFmt;
1449 extern CharPtr discReportMissingTranscriptIDFmt;
1450 
1451 extern CharPtr GetBioseqLabel (BioseqPtr bsp);
1452 extern CharPtr GetBioseqSetLabel (BioseqSetPtr bssp);
1453 
1454 NLM_EXTERN ValNodePtr ValNodeDupStringList (ValNodePtr vnp);
1455 NLM_EXTERN ValNodePtr ValNodeDupIntList (ValNodePtr vnp);
1456 
1457 typedef enum {
1458   eLocusTagErrorBadFormat,
1459   eLocusTagErrorDuplicate,
1460   eLocusTagErrorInconsistentPrefix
1461 } ELocusTagError;
1462 
1463 NLM_EXTERN ValNodePtr FindBadLocusTagsInList (ValNodePtr list);
1464 
1465 
1466 typedef struct discreportoutputconfig {
1467   Boolean use_feature_table_format;
1468   Boolean expand_report_categories[MAX_DISC_TYPE];
1469   Boolean summary_report;
1470   Boolean add_output_tag;
1471   Boolean add_extra_output_tag;
1472   Int4 num_nucs;
1473 } DiscReportOutputConfigData, PNTR DiscReportOutputConfigPtr;
1474 
1475 NLM_EXTERN void AddToOutputConfig(SeqEntryPtr sep, DiscReportOutputConfigPtr c);
1476 NLM_EXTERN void AddListToOutputConfig(ValNodePtr list, DiscReportOutputConfigPtr c);
1477 
1478 typedef struct globaldiscrepreport {
1479   ValNodeBlock  locus_tag_list;
1480   ValNodeBlock  missing_locus_tag;
1481   ValNodeBlock  cds_product_list;
1482   ValNodeBlock  missing_cds_product;
1483   ValNodeBlock  mrna_product_list;
1484   ValNodeBlock  missing_mrna_product;
1485   ValNodePtr  adjacent_locus_tag_disc_list;
1486   ValNodeBlock  missing_gnl_list;
1487   ValNodeBlock  gnl_list;
1488   ValNodePtr  global_src_qual_vals;
1489   ValNodePtr  global_srcs;
1490   ValNodeBlock  global_prot_name_list;
1491 
1492   ValNodePtr  src_qual_repeated_list;
1493   ValNodePtr  src_qual_multi_list;
1494   ValNodeBlock  feature_count_list;
1495   ValNodeBlock  discrepancy_list;
1496 
1497   PerformDiscrepancyTest    taxlookup;
1498   DiscrepancyConfigPtr      test_config;
1499   DiscReportOutputConfigPtr output_config;
1500 } GlobalDiscrepReportData, PNTR GlobalDiscrepReportPtr;
1501 
1502 NLM_EXTERN GlobalDiscrepReportPtr GlobalDiscrepReportNew ();
1503 NLM_EXTERN GlobalDiscrepReportPtr GlobalDiscrepReportFree (GlobalDiscrepReportPtr g);
1504 NLM_EXTERN void AddSeqEntryToGlobalDiscrepReport (SeqEntryPtr sep, GlobalDiscrepReportPtr g, CharPtr filename);
1505 NLM_EXTERN Boolean WriteGlobalDiscrepancyReportEx (GlobalDiscrepReportPtr g, FILE *fp, CharPtr extra_comment);
1506 NLM_EXTERN void WriteGlobalDiscrepancyReport (GlobalDiscrepReportPtr g, FILE *fp);
1507 
1508 NLM_EXTERN Boolean CollectionDateIsInTheFuture (CharPtr name);
1509 NLM_EXTERN Boolean CollectionDateIsValid (CharPtr name);
1510 NLM_EXTERN Boolean CollectionDatesInOrder (CharPtr name);
1511 
1512 /* for the Barcode Discrepancy Test */
1513 typedef enum {
1514   eBarcodeTest_Length = 0,
1515   eBarcodeTest_Primers,
1516   eBarcodeTest_Country,
1517   eBarcodeTest_SpecimenVoucher,
1518   eBarcodeTest_PercentN,
1519   eBarcodeTest_CollectionDate,
1520   eBarcodeTest_OrderAssignment,
1521   eBarcodeTest_LowTrace,
1522   eBarcodeTest_FrameShift,
1523   eBarcodeTest_StructuredSpecimenVoucher,
1524   eBarcodeTest_LAST
1525 } EBarcodeTest;
1526 
1527 typedef struct barcodetestconfig
1528 {
1529   Boolean conf_list[eBarcodeTest_LAST];
1530   Int4    min_length;
1531   FloatLo min_n_percent;
1532   Boolean require_keyword;
1533 } BarcodeTestConfigData, PNTR BarcodeTestConfigPtr;
1534 
1535 extern BarcodeTestConfigPtr BarcodeTestConfigNew();
1536 extern BarcodeTestConfigPtr BarcodeTestConfigFree (BarcodeTestConfigPtr cfg);
1537 
1538 extern CharPtr GetBarcodeTestName (Int4 i);
1539 
1540 extern Int4 GetBarcodeTestNumFromBarcodeTestName (CharPtr test_name);
1541 
1542 typedef struct barcodetestresults
1543 {
1544   Boolean failed_tests[eBarcodeTest_LAST];
1545   BioseqPtr bsp;
1546   FloatLo   n_percent;
1547   Int4      num_trace;
1548 } BarcodeTestResultsData, PNTR BarcodeTestResultsPtr;
1549 
1550 extern BarcodeTestResultsPtr BarcodeTestResultsNew ();
1551 extern BarcodeTestResultsPtr BarcodeTestResultsFree (BarcodeTestResultsPtr res);
1552 extern BarcodeTestResultsPtr BarcodeTestResultsCopy (BarcodeTestResultsPtr res);
1553 extern ValNodePtr            BarcodeTestResultsListFree (ValNodePtr res_list);
1554 extern ValNodePtr            BarcodeTestResultsExtractPass (ValNodePtr PNTR res_list);
1555 
1556 extern Boolean IsBarcodeID (SeqIdPtr sip);
1557 
1558 extern CharPtr BarcodeTestBarcodeIdString (BioseqPtr bsp);
1559 extern CharPtr BarcodeTestGenbankIdString (BioseqPtr bsp);
1560 
1561 /* This one gets discrepancies by category */
1562 extern ValNodePtr GetBarcodeDiscrepancies (ValNodePtr sep_list, BarcodeTestConfigPtr cfg);
1563 extern ValNodePtr GetBarcodePassFail (SeqEntryPtr sep, BarcodeTestConfigPtr cfg);
1564 NLM_EXTERN CharPtr GetBarcodeTestFailureReasons (BarcodeTestResultsPtr res);
1565 /* This one lists passes and failures, with reasons for failures */
1566 extern void WriteBarcodeTestComprehensive (FILE *fp, ValNodePtr results_list);
1567 extern void WriteBarcodeDiscrepancies (FILE *fp, ValNodePtr results_list);
1568 extern void WriteBarcodeFailureReport (FILE *fp, ValNodePtr results_list);
1569 extern void WriteBarcodeTestCompliance (FILE *fp, ValNodePtr results_list);
1570 extern void WriteBarcodeTestComplianceEx (FILE *fp, ValNodePtr results_list, Boolean low_trace_fail);
1571 extern void WriteBarcodeTagTable (FILE *fp, ValNodePtr results_list);
1572 NLM_EXTERN Boolean IsIBOL (BioseqPtr bsp);
1573 
1574 NLM_EXTERN Boolean
1575 BarcodeValidateOneSeqEntry
1576 (FILE *ofp,
1577  SeqEntryPtr sep,
1578  Boolean show_all,
1579  Boolean use_xml,
1580  Boolean show_header,
1581  CharPtr xml_header_text);
1582 extern void RemoveBarcodeTech (FILE *fp, ValNodePtr results_list);
1583 extern void RemoveBarcodeKeywords (FILE *fp, ValNodePtr results_list);
1584 extern void ApplyBarcodeKeywords (FILE *fp, ValNodePtr results_list);
1585 extern void ApplyBarcodeTech (FILE *fp, ValNodePtr results_list);
1586 extern Boolean PassBarcodeTests (BarcodeTestResultsPtr res);
1587 extern Boolean HasBARCODETech (BioseqPtr bsp);
1588 NLM_EXTERN void ApplyBarcodeKeywordToBioseq (BioseqPtr bsp);
1589 NLM_EXTERN Boolean BioseqHasBarcodeKeyword (BioseqPtr bsp);
1590 NLM_EXTERN Boolean BioseqHasKeyword (BioseqPtr bsp, CharPtr keyword);
1591 NLM_EXTERN void RemoveBarcodeKeywordsFromObjectList (FILE *fp, ValNodePtr object_list);
1592 NLM_EXTERN Boolean RemoveBarcodeTechFromBioseq (BioseqPtr bsp);
1593 extern Int4 CountPolymorphismsInBioseq (BioseqPtr bsp);
1594 NLM_EXTERN Boolean RemoveBarcodeKeywordFromBioseq (BioseqPtr bsp);
1595 
1596 
1597 extern CharPtr ExpandDiscrepancyReportTestsFromString (CharPtr list, Boolean expand, DiscReportOutputConfigPtr dcp);
1598 extern void CollateDiscrepancyReports (ValNodePtr PNTR discrepancy_reports);
1599 extern void WriteAsnDiscReport (ValNodePtr discrepancy_list, FILE *ofp, DiscReportOutputConfigPtr oc, Boolean use_flag);
1600 
1601 
1602 /* extern to allow access to subsource_subtype_alist */
1603 typedef struct Nlm_qual_name_assoc {
1604    Nlm_CharPtr name;
1605    Uint1       value;
1606 } Nlm_QualNameAssoc, PNTR Nlm_QualNameAssocPtr, Nlm_QualNameAlist[];
1607 
1608 typedef struct Nlm_name_name_assoc {
1609    Nlm_CharPtr name;
1610    Nlm_CharPtr alias;
1611    Uint1       value;
1612 } Nlm_NameNameAssoc, PNTR Nlm_NameNameAssocPtr, Nlm_NameNameAlist[];
1613 
1614 extern Nlm_QualNameAssoc current_orgmod_subtype_alist[];
1615 extern Nlm_QualNameAssoc discouraged_orgmod_subtype_alist[];
1616 extern Nlm_QualNameAssoc discontinued_orgmod_subtype_alist[];
1617 extern Nlm_NameNameAssoc orgmod_aliases[];
1618 extern CharPtr GetOrgModQualName (Uint1 subtype);
1619 extern void BioSourceHasOldOrgModQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued);
1620 NLM_EXTERN void StringHasOrgModPrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref);
1621 NLM_EXTERN CharPtr StringHasPrefix (CharPtr str, CharPtr pref, Boolean novalneeded, Boolean skippref);
1622 
1623 extern Nlm_QualNameAssoc  current_subsource_subtype_alist [];
1624 extern Nlm_QualNameAssoc  discouraged_subsource_subtype_alist[];
1625 extern Nlm_QualNameAssoc  discontinued_subsource_subtype_alist[];
1626 extern Nlm_NameNameAssoc  subsource_aliases [];
1627 extern CharPtr GetSubsourceQualName (Uint1 subtype);
1628 extern void BioSourceHasOldSubSourceQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued);
1629 extern Boolean GeneRefMatch (GeneRefPtr grp1, GeneRefPtr grp2);
1630 extern Boolean DbxrefsMatch (ValNodePtr vnp1, ValNodePtr vnp2, Boolean case_sensitive);
1631 extern Boolean XrefsMatch (SeqFeatXrefPtr x1, SeqFeatXrefPtr x2);
1632 extern Boolean ProtRefMatch (ProtRefPtr prp1, ProtRefPtr prp2);
1633 
1634 extern void IsCorrectLatLonFormat (CharPtr lat_lon, BoolPtr format_correct, BoolPtr precision_correct, BoolPtr lat_in_range, BoolPtr lon_in_range);
1635 extern CharPtr FixLatLonFormat (CharPtr orig_lat_lon);
1636 extern Boolean ParseLatLon (CharPtr lat_lon, FloatHi PNTR latP, FloatHi PNTR lonP);
1637 extern void ApplyBarcodeDbxrefsToBioseq (BioseqPtr bsp, Pointer data);
1638 extern void ApplyFBOLDbxrefsToBioseq (BioseqPtr bsp, Pointer data);
1639 
1640 extern CharPtr GetCountryFix (CharPtr country, CharPtr PNTR country_list);
1641 
1642 extern CharPtr ncrnaClassList[];
1643 extern Int4 NcrnaOTHER;
1644 extern Boolean IsStringInNcRNAClassList (CharPtr str);
1645 extern Boolean IsStringInRegulatoryClassList (CharPtr str);
1646 extern Boolean IsStringInRecombinationClassList (CharPtr str);
1647 extern ValNodePtr ListFeaturesInLocation (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice);
1648 extern ValNodePtr ListCodingRegionsContainedInSourceFeatures (SeqEntryPtr sep);
1649 extern ValNodePtr ListFeaturesOverlappingLocationEx (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice, ValNodePtr constraint);
1650 extern ValNodePtr ListFeaturesOverlappingLocation (BioseqPtr bsp, SeqLocPtr slp, Uint1 seqfeatChoice, Uint1 featdefChoice);
1651 
1652 extern void ConvertSourceFeatDescProc (SeqFeatPtr sfp, Pointer userdata);
1653 
1654 /* for correcting capitalization */
1655 NLM_EXTERN void
1656 FixCapitalizationInElement
1657 (CharPtr PNTR pEl,
1658  Boolean      bAbbrev,
1659  Boolean      bShortWords,
1660  Boolean      bApostrophes);
1661 
1662 NLM_EXTERN void FixCapitalizationInAuthor (AuthorPtr pAuthor);
1663 NLM_EXTERN void FixCapsInPubAffil (AffilPtr affil);
1664 NLM_EXTERN void FixCapsInPubAffilEx (AffilPtr affil, Boolean punct_only);
1665 NLM_EXTERN void FixCapitalizationInCountryString (CharPtr PNTR pCountry);
1666 NLM_EXTERN void FixCapitalizationInCountryStringEx (CharPtr PNTR pCountry, Boolean punct_only);
1667 NLM_EXTERN void FixStateAbbreviationsInAffil (AffilPtr affil, LogInfoPtr lip);
1668 
1669 NLM_EXTERN void FixAffiliationShortWordsInElement (CharPtr PNTR pEl);
1670 NLM_EXTERN void FixKnownAbbreviationsInElement (CharPtr PNTR pEl);
1671 
1672 NLM_EXTERN void FixAbbreviationsInElement (CharPtr PNTR pEl);
1673 NLM_EXTERN void FixOrgNamesInString (CharPtr str, ValNodePtr org_names);
1674 NLM_EXTERN void ResetCapitalization (Boolean first_is_upper, CharPtr pString);
1675 
1676 NLM_EXTERN SeqIdPtr CreateSeqIdFromText (CharPtr id_str, SeqEntryPtr sep);
1677 NLM_EXTERN SeqLocPtr SeqLocWholeNew (BioseqPtr bsp);
1678 NLM_EXTERN Int4 GetDeltaSeqLen (DeltaSeqPtr dsp);
1679 NLM_EXTERN DeltaSeqPtr GetDeltaSeqForPosition(Int4 pos, BioseqPtr bsp, Int4Ptr pStart);
1680 
1681 typedef SeqAlignPtr (*GlobalAlignFunc) PROTO ((BioseqPtr, BioseqPtr, BoolPtr));
1682 
1683 typedef enum {
1684   eAdjustFeatForGap_unknown_gaps = 0x01,
1685   eAdjustFeatForGap_known_gaps = 0x02,
1686   eAdjustFeatForGap_make_partial = 0x04,
1687   eAdjustFeatForGap_partial_for_pseudo = 0x08,
1688   eAdjustFeatForGap_trim_ends = 0x10,
1689   eAdjustFeatForGap_split_internal = 0x20,
1690   eAdjustFeatForGap_split_in_intron = 0x40
1691 } EAdjustFeatForGap;
1692 
1693 typedef struct adjustfeatforgap {
1694   ValNodePtr feature_list;
1695   Uint4      options;
1696   GlobalAlignFunc align_func;
1697   ValNodePtr features_in_gap;
1698 } AdjustFeatForGapData, PNTR AdjustFeatForGapPtr;
1699 
1700 NLM_EXTERN AdjustFeatForGapPtr AdjustFeatForGapFree (AdjustFeatForGapPtr agp);
1701 NLM_EXTERN Boolean FeatureOkForFeatureList (SeqFeatPtr sfp, ValNodePtr feature_list);
1702 NLM_EXTERN void
1703 LocationContainsGaps
1704 (SeqLocPtr slp,
1705  BioseqPtr bsp,
1706  Uint4     options,
1707  BoolPtr   terminal_gaps,
1708  BoolPtr   internal_gaps,
1709  BoolPtr   entirely_in_gap);
1710 
1711 NLM_EXTERN void SetPartialsAfterSplittingAtGap (SeqLocPtr before, SeqLocPtr after, Boolean set_partial_ends, Boolean partial5, Boolean partial3);
1712 NLM_EXTERN void AdjustFeatureForGapsCallback (SeqFeatPtr sfp, Pointer data);
1713 NLM_EXTERN void MarkFeaturesInGapsForDeletion (AdjustFeatForGapPtr afgp);
1714 NLM_EXTERN void AdjustCDSLocationsForUnknownGapsCallback (SeqFeatPtr sfp, Pointer data);
1715 NLM_EXTERN Boolean GapInLocation (Int4 seq_offset, Int4 length, SeqLocPtr loc);
1716 NLM_EXTERN BioseqPtr
1717 AddProteinSequenceCopy
1718 (BioseqPtr  protbsp,
1719  BioseqPtr  featbsp,
1720  SeqFeatPtr new_sfp,
1721  Uint2      entityID);
1722 NLM_EXTERN void AdjustFrame (SeqFeatPtr sfp, BioseqPtr oldprot);
1723 NLM_EXTERN void SetProductSequencePartials (BioseqPtr protbsp, Boolean partial5, Boolean partial3);
1724 NLM_EXTERN void AddCDSGapComment (SeqFeatPtr sfp);
1725 
1726 
1727 NLM_EXTERN Boolean SeqEdFixProteinFeatures (BioseqPtr oldbsp, BioseqPtr newbsp, Boolean force_fix, GlobalAlignFunc align_func);
1728 NLM_EXTERN void SeqEdTranslateOneCDS (SeqFeatPtr sfp, BioseqPtr featbsp, Uint2 entityID, GlobalAlignFunc align_func);
1729 NLM_EXTERN void SeqEdRemapLocation (SeqAlignPtr salp, SeqLocPtr slp, Int4 seq_len);
1730 
1731 NLM_EXTERN CharPtr GetStateAbbreviation (CharPtr state);
1732 
1733 typedef SeqAlignPtr (*LocalAlignFunc) PROTO ((BioseqPtr, BioseqPtr));
1734 
1735 extern void ReverseAlignmentStrand (SeqAlignPtr salp, Int4 nth);
1736 
1737 NLM_EXTERN SeqAlignPtr SortPairwiseAlignmentsByFirstSeqRange (SeqAlignPtr salp);
1738 NLM_EXTERN ValNodePtr ReportCoverageForBioseqSeqHist (BioseqPtr bsp);
1739 
1740 NLM_EXTERN void ConvertLocalIdsToBarcodeIds (SeqEntryPtr sep);
1741 
1742 NLM_EXTERN ValNodePtr MakeTokensFromLine (CharPtr line);
1743 
1744 NLM_EXTERN SeqFeatPtr GetGeneForFeature (SeqFeatPtr sfp);
1745 NLM_EXTERN SeqFeatPtr GetmRNAforCDS (SeqFeatPtr cds);
1746 NLM_EXTERN SeqFeatPtr GetCDSformRNA (SeqFeatPtr mrna);
1747 
1748 NLM_EXTERN Boolean IsStringInSpanInList (CharPtr str, CharPtr list);
1749 
1750 NLM_EXTERN void ParseGoTermsFromFields (SeqEntryPtr sep);
1751 
1752 /* for autodef */
1753 typedef enum {
1754   RemovableExon = 0,
1755   RemovableIntron,
1756   Removable5UTR,
1757   Removable3UTR,
1758   RemovableuORF,
1759   RemovableCDS,
1760   RemovablePromoter,
1761   RemovableLTR,
1762   RemovableNoncodingProductFeat,
1763   RemovableMobileElement,
1764   RemovablePrecursorRNA,
1765   RemovablencRNA,
1766   RemovableRepeatRegion,
1767   NumRemovableItems
1768 } RemovableList;
1769 NLM_EXTERN CharPtr GetRemovableItemName (Int4 i);
1770 
1771 typedef enum {
1772   DEFLINE_USE_FEATURES = 1,
1773   DEFLINE_COMPLETE_SEQUENCE,
1774   DEFLINE_PARTIAL_SEQUENCE,
1775   DEFLINE_COMPLETE_GENOME,
1776   DEFLINE_PARTIAL_GENOME,
1777   DEFLINE_SEQUENCE
1778 } DefLineType;
1779 
1780 typedef struct deflinefeaturerequestlist {
1781   Boolean      keep_items[NumRemovableItems];
1782   Boolean      add_fake_promoters;
1783   Boolean      suppress_alt_splice_phrase;
1784   Boolean      remove_subfeatures;
1785   DefLineType  feature_list_type;
1786   Int4         misc_feat_parse_rule;
1787   Boolean      suppress_locus_tags;
1788   ValNodePtr   suppressed_feature_list;
1789   Boolean      use_ncrna_note;
1790   Boolean      suppress_allele;
1791 } DeflineFeatureRequestList, PNTR DeflineFeatureRequestListPtr;
1792 
1793 NLM_EXTERN void InitFeatureRequests (DeflineFeatureRequestListPtr feature_requests);
1794 
1795 
1796 /* ModifierItemLocalData is used to store information about the results of
1797  * a search of the set of organisms in a record and the results of user
1798  * input to a dialog for deciding which modifiers should be used in the
1799  * organism description.
1800  */
1801 typedef struct modifieritemlocal {
1802 /*  ButtoN        button; */
1803   Boolean       any_present;
1804   Boolean       all_present;
1805   Boolean       is_unique;
1806   CharPtr       first_value_seen;
1807   ValNodePtr    values_seen;
1808   Boolean       all_unique;
1809   CharPtr       status;
1810   Boolean       required;
1811 } ModifierItemLocalData, PNTR ModifierItemLocalPtr;
1812 
1813 typedef enum {
1814   DEFLINE_POS_Bio_material = 0,
1815   DEFLINE_POS_Biotype,
1816   DEFLINE_POS_Biovar,
1817   DEFLINE_POS_Breed,
1818   DEFLINE_POS_Cell_line,
1819   DEFLINE_POS_Chemovar,
1820   DEFLINE_POS_Chromosome,
1821   DEFLINE_POS_Clone,
1822   DEFLINE_POS_Country,
1823   DEFLINE_POS_Cultivar,
1824   DEFLINE_POS_Culture_collection,
1825   DEFLINE_POS_Dev_stage,
1826   DEFLINE_POS_Ecotype,
1827   DEFLINE_POS_Endogenous_virus_name,
1828   DEFLINE_POS_Genotype,
1829   DEFLINE_POS_Haplogroup,
1830   DEFLINE_POS_Haplotype,
1831   DEFLINE_POS_Isolate,
1832   DEFLINE_POS_Linkage_group,
1833   DEFLINE_POS_Map,
1834   DEFLINE_POS_Pathovar,
1835   DEFLINE_POS_Plasmid_name,
1836   DEFLINE_POS_Pop_variant,
1837   DEFLINE_POS_Segment,
1838   DEFLINE_POS_Serogroup,
1839   DEFLINE_POS_Serotype,
1840   DEFLINE_POS_Serovar,
1841   DEFLINE_POS_Specimen_voucher,
1842   DEFLINE_POS_Strain,
1843   DEFLINE_POS_Subclone,
1844   DEFLINE_POS_Substrain,
1845   DEFLINE_POS_Transgenic
1846 } DefLinePos;
1847 
1848 NLM_EXTERN Int4 GetDeflinePosForFieldName(CharPtr name);
1849 NLM_EXTERN Int4 GetDeflinePosForFieldType (ValNodePtr field);
1850 
1851 /* ModifierItemGlobalData is used to store information about the available
1852  * modifiers - the name to use when displaying a list of checkboxes, whether
1853  * the modifier is an Organism modifier or a Source modifier, the subtype
1854  * to use when looking for the modifier in the organism qualifier list,
1855  * and whether this modifier is required by default.
1856  */
1857 typedef struct modifieritemglobal {
1858   CharPtr       name;
1859   Boolean       isOrgMod;
1860   Uint1         subtype;
1861 } ModifierItemGlobalData, PNTR ModifierItemGlobalPtr;
1862 
1863 extern ModifierItemGlobalData DefLineModifiers[];
1864 NLM_EXTERN size_t NumDefLineModifiers (void);
1865 
1866 /* OrganismDescriptionModifiers is used to apply specific user preferences
1867  * for how to construct the organism descriptions - whether or not to use
1868  * labels for the modifiers, whether and how to limit the number of modifiers
1869  * used in any one organism description, whether to keep or remove modifier
1870  * information in parentheses in the organism taxonomy name, and whether or
1871  * not to apply modifiers to organisms with "sp." in the middle of the
1872  * taxonomy name.
1873  */
1874 typedef struct organismdescriptionmodifiers {
1875   Boolean  use_labels;
1876   Int2     max_mods;
1877   Boolean  keep_paren;
1878   Boolean  exclude_sp;
1879   Boolean  exclude_cf;
1880   Boolean  exclude_aff;
1881   Boolean  exclude_nr;
1882   Boolean  include_country_extra;
1883   Int4     clone_isolate_HIV_rule_num;
1884   Boolean  use_modifiers;
1885   Boolean  allow_semicolon_in_modifier;
1886   Boolean  allow_mod_at_end_of_taxname;
1887 } OrganismDescriptionModifiers, PNTR OrganismDescriptionModifiersPtr;
1888 
1889 NLM_EXTERN Boolean ShouldExcludeSp (SeqEntryPtr sep);
1890 NLM_EXTERN void InitOrganismDescriptionModifiers(OrganismDescriptionModifiersPtr odmp, SeqEntryPtr sep);
1891 
1892 /* These values are used for the clone_isolate_HIV_rule_num value in OrganismDescriptionModifiers */
1893 typedef enum {
1894   clone_isolate_HIV_rule_prefer_clone = 1,
1895   clone_isolate_HIV_rule_prefer_isolate,
1896   clone_isolate_HIV_rule_want_both
1897 } clone_isolate_HIV_rule_values;
1898 
1899 
1900 typedef struct sourcequaldesc
1901 {
1902   CharPtr       name;
1903   Boolean       isOrgMod;
1904   Uint1         subtype;
1905   Uint1         subfield;
1906 } SourceQualDescData, PNTR SourceQualDescPtr;
1907 
1908 NLM_EXTERN int LIBCALLBACK SortVnpBySourceQualDesc (VoidPtr ptr1, VoidPtr ptr2);
1909 
1910 NLM_EXTERN void SetRequiredModifiers (ModifierItemLocalPtr modList);
1911 NLM_EXTERN void CountModifiers (ModifierItemLocalPtr ItemList, SeqEntryPtr sep);
1912 NLM_EXTERN ValNodePtr FindBestModifiersEx(SeqEntryPtr sep, ModifierItemLocalPtr ItemList, Boolean use_new);
1913 NLM_EXTERN ValNodePtr FindBestModifiers(SeqEntryPtr sep, ModifierItemLocalPtr ItemList);
1914 NLM_EXTERN ValNodePtr FindBestModifiersForDeflineClauseList (ValNodePtr defline_clauses, ModifierItemLocalPtr ItemList);
1915 
1916 NLM_EXTERN ValNodePtr GetModifierIndicesFromModList (ModifierItemLocalPtr modList);
1917 extern void TestFindBestQualCombo (FILE *fp);
1918 
1919 
1920 NLM_EXTERN CharPtr MergeValNodeStrings (ValNodePtr list, Boolean useReturn);
1921 
1922 NLM_EXTERN ValNodePtr FindExactStringListMatch (ValNodePtr list, CharPtr value);
1923 
1924 NLM_EXTERN void BuildDefLineFeatClauseList
1925 ( SeqEntryPtr sep,
1926   Uint2 entityID,
1927   DeflineFeatureRequestList PNTR feature_requests,
1928   Int2 product_flag,
1929   Boolean alternate_splice_flag,
1930   Boolean gene_cluster_opp_strand,
1931   ValNodePtr PNTR list);
1932 
1933 NLM_EXTERN Boolean AreFeatureClausesUnique (ValNodePtr list);
1934 NLM_EXTERN void DefLineFeatClauseListFree (ValNodePtr vnp);
1935 
1936 NLM_EXTERN void
1937 BuildDefinitionLinesFromFeatureClauseLists
1938 (ValNodePtr list,
1939  ModifierItemLocalPtr modList,
1940  ValNodePtr modifier_indices,
1941  OrganismDescriptionModifiersPtr odmp);
1942 
1943 NLM_EXTERN void
1944 BuildDefLinesFromFeatClauseListsForOneBsp
1945 (ValNodePtr list,
1946  ModifierItemLocalPtr modList,
1947  ValNodePtr modifier_indices,
1948  OrganismDescriptionModifiersPtr odmp,
1949  BioseqPtr bsp);
1950 
1951 NLM_EXTERN void
1952 AutoDefForSeqEntry
1953 (SeqEntryPtr sep,
1954  Uint2 entityID,
1955  OrganismDescriptionModifiersPtr odmp,
1956  ModifierItemLocalPtr modList,
1957  ValNodePtr modifier_indices,
1958  DeflineFeatureRequestListPtr feature_requests,
1959  Int2 product_flag,
1960  Boolean alternate_splice_flag,
1961  Boolean gene_cluster_opp_strand);
1962 
1963 NLM_EXTERN void
1964 AutoDefForSeqEntryEx
1965 (SeqEntryPtr sep,
1966 Uint2 entityID,
1967 OrganismDescriptionModifiersPtr odmp,
1968 ModifierItemLocalPtr modList,
1969 ValNodePtr modifier_indices,
1970 DeflineFeatureRequestListPtr feature_requests,
1971 Int2 product_flag,
1972 Boolean alternate_splice_flag,
1973 Boolean gene_cluster_opp_strand,
1974 Boolean update_options);
1975 
1976 NLM_EXTERN void RegenerateAutoDef(BioseqPtr bsp);
1977 NLM_EXTERN void RemoveAutodefObjects(SeqEntryPtr sep);
1978 NLM_EXTERN void RemoveAutodefObjectsForDesc(SeqDescPtr sdp);
1979 
1980 NLM_EXTERN void AddPopsetTitles
1981 (SeqEntryPtr sep,
1982  DeflineFeatureRequestListPtr feature_requests,
1983  Int2 product_flag,
1984  Boolean alternate_splice_flag,
1985  Boolean gene_cluster_opp_strand);
1986 
1987 NLM_EXTERN void RemovePopsetTitles(SeqEntryPtr sep);
1988 
1989 NLM_EXTERN UserObjectPtr MakeAutoDefOptionsUserObject
1990 (OrganismDescriptionModifiersPtr odmp,
1991 ModifierItemLocalPtr modList,
1992 ValNodePtr modifier_indices,
1993 DeflineFeatureRequestListPtr feature_requests,
1994 Int2 product_flag,
1995 Boolean alternate_splice_flag,
1996 Boolean gene_cluster_opp_strand);
1997 
1998 NLM_EXTERN void AddAutoDefUserObjectToSeqEntry(SeqEntryPtr sep, UserObjectPtr uop);
1999 
2000 NLM_EXTERN void DoTbl2AsnAutoDef(SeqEntryPtr sep, Uint2 entityID);
2001 
2002 typedef struct popsetretrostat {
2003   Int4 feature_clause;
2004   Int4 common_title;
2005   Int4 uncalculatable;
2006   Boolean title_added;
2007 } PopSetRetroStatData, PNTR PopSetRetroStatPtr;
2008 
2009 NLM_EXTERN void PopSetAutoDefRetro (SeqEntryPtr sep, PopSetRetroStatPtr stat);
2010 
2011 NLM_EXTERN Boolean IsSpName (CharPtr taxName);
2012 
2013 #define DEFAULT_ORGANELLE_CLAUSE 10
2014 NLM_EXTERN BioSourcePtr GetBiopForBsp (BioseqPtr bsp);
2015 NLM_EXTERN Boolean IsLocAInBonSameStrand (SeqLocPtr slp1, SeqLocPtr slp2);
2016 NLM_EXTERN void CleanUpTaxName (CharPtr taxName, Boolean keep_in_paren);
2017 NLM_EXTERN Boolean UseOrgModifier (OrgModPtr mod, CharPtr taxName, Boolean allow_at_end);
2018 NLM_EXTERN Boolean UseSubSrcModifier (SubSourcePtr ssp, CharPtr taxName, Boolean allow_at_end);
2019 NLM_EXTERN void AddModifierLabel
2020 ( Boolean use_labels,
2021   Boolean is_orgmod,
2022   Uint1   subtype,
2023   CharPtr modifier_text);
2024 NLM_EXTERN Boolean LIBCALLBACK IsMobileElement (SeqFeatPtr sfp);
2025 NLM_EXTERN void RemoveNucProtSetTitles (SeqEntryPtr sep);
2026 NLM_EXTERN void RemoveMRnaTitles (SeqEntryPtr sep);
2027 NLM_EXTERN void RemoveProteinTitles (SeqEntryPtr sep);
2028 NLM_EXTERN void SetAutoDefIDModifiers (ModifierItemLocalPtr modList);
2029 
2030 
2031 NLM_EXTERN ValNodePtr ReadTabTableFromFile (FILE *fp);
2032 NLM_EXTERN ValNodePtr FlipTabTableAxes (ValNodePtr row_list);
2033 NLM_EXTERN ValNodePtr FreeTabTable (ValNodePtr row_list);
2034 NLM_EXTERN ValNodePtr CopyTabTable (ValNodePtr row_list);
2035 NLM_EXTERN void WriteTabTableToFile (ValNodePtr table, FILE *fp);
2036 NLM_EXTERN ValNodePtr CountTabTableBlanks (ValNodePtr row_list);
2037 NLM_EXTERN ValNodePtr ScanTabTableForSpecialCharacters (ValNodePtr row_list);
2038 NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInText (CharPtr PNTR text);
2039 NLM_EXTERN void AutoReplaceSpecialCharactersWithMessage (CharPtr PNTR text);
2040 NLM_EXTERN ValNodePtr AutoReplaceSpecialCharactersInTabTable (ValNodePtr row_list);
2041 NLM_EXTERN void AutoFixSpecialCharactersInEntity (Uint2 entityID);
2042 
2043 NLM_EXTERN void RemoveQuotesFromTabTable (ValNodePtr row_list);
2044 NLM_EXTERN void ReparseTabTableConvertFirstSpaceToTab (ValNodePtr row_list);
2045 NLM_EXTERN void ReparseTabTableConvertMultiSpaceToTab (ValNodePtr row_list);
2046 NLM_EXTERN void CombineTabTableColumns (ValNodePtr row_list, ValNodePtr column_pos, CharPtr delimiter);
2047 NLM_EXTERN void ReparseTabTableSeparateColumnAtDelimiter (ValNodePtr row_list, Char delimiter, Int4 col, Boolean stop_after_first);
2048 NLM_EXTERN void AddTextToTabTableColumn (ValNodePtr row_list, Int4 col, CharPtr text, Uint2 existing_text);
2049 NLM_EXTERN ValNodePtr ReadOneColumnList (CharPtr line);
2050 NLM_EXTERN ValNodePtr SortTableRowByAnyColumn (ValNodePtr table, Int4 column);
2051 NLM_EXTERN void AdjustInfluenzaSourceTable (ValNodePtr table);
2052 
2053 NLM_EXTERN void SpecialCharFindWithContext (CharPtr PNTR strp, Pointer userdata, BoolPtr did_find, BoolPtr did_change);
2054 NLM_EXTERN ValNodePtr FreeContextList (ValNodePtr context_list);
2055 
2056 typedef struct twostringhash {
2057   CharPtr PNTR table;
2058   Int4 num_lines;
2059 } TwoStringHashData, PNTR TwoStringHashPtr;
2060 
2061 NLM_EXTERN TwoStringHashPtr TwoStringHashFree (TwoStringHashPtr tsh);
2062 NLM_EXTERN TwoStringHashPtr MakeTwoStringHashFromTabTable (ValNodePtr line_list, Int4 column1, Int4 column2);
2063 NLM_EXTERN CharPtr GetValueFromTwoStringHash (CharPtr key, TwoStringHashPtr tsh);
2064 
2065 NLM_EXTERN Int4 ExtendSeqLocToEnd (SeqLocPtr slp, BioseqPtr bsp, Boolean end5);
2066 
2067 NLM_EXTERN void PromoteAllToBestID (SeqEntryPtr sep);
2068 NLM_EXTERN void PromoteAllToWorstID (SeqEntryPtr sep);
2069 NLM_EXTERN void RemoveAllVersionLocusGIFromID (SeqEntryPtr sep);
2070 
2071 /* functions for converting features */
2072 NLM_EXTERN Boolean IsBioseqSetInGPS (BioseqSetPtr bssp);
2073 NLM_EXTERN Boolean IsBioseqInGPS (BioseqPtr bsp);
2074 NLM_EXTERN Boolean IsFeatInGPS (SeqFeatPtr sfp);
2075 NLM_EXTERN void
2076 ApplyCDSOptionsToFeature
2077 (SeqFeatPtr sfp,
2078  Boolean remove_mRNA,
2079  Boolean remove_gene,
2080  Boolean remove_transcript_id,
2081  Boolean keep_original);
2082 
2083 NLM_EXTERN Boolean
2084 ConvertCDSToRNA
2085 (SeqFeatPtr  sfp,
2086  Uint2       rna_type);
2087 
2088 NLM_EXTERN Boolean ConvertGeneToRNA (SeqFeatPtr sfp, Uint2 featdef_to);
2089 NLM_EXTERN Boolean ConvertBioSrcToRepeatRegion (SeqFeatPtr sfp, Uint2 featdef_to);
2090 NLM_EXTERN CharPtr SubSourceText (BioSourcePtr biop, Uint1 subtype, BoolPtr found);
2091 NLM_EXTERN CharPtr OrgModText (BioSourcePtr biop, Uint1 subtype, BoolPtr found);
2092 NLM_EXTERN CharPtr NoteText (BioSourcePtr biop, CharPtr comment);
2093 NLM_EXTERN Boolean ConvertNonPseudoCDSToMiscFeat (SeqFeatPtr sfp, Boolean viral);
2094 
2095 NLM_EXTERN CharPtr GetImportFeatureName (Uint2 featdef_key);
2096 NLM_EXTERN RnaRefPtr RnaRefFromLabel (Uint2 featdef_to, CharPtr label, BoolPtr add_label_to_comment);
2097 
2098 NLM_EXTERN SeqLocPtr GetProteinLocationForNucleotideFeatureConversion (SeqLocPtr nuc_slp, BoolPtr no_cds);
2099 NLM_EXTERN SeqLocPtr FindNucleotideLocationForProteinFeatureConversion (SeqLocPtr slp);
2100 NLM_EXTERN SeqLocPtr BuildProtLoc (SeqFeatPtr overlapping_cds, SeqLocPtr slp, Int4Ptr frame);
2101 NLM_EXTERN Boolean ConvertImpToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2102 NLM_EXTERN Boolean ConvertProtToImpFunc (SeqFeatPtr  sfp, Uint2 featdef_to);
2103 NLM_EXTERN Boolean ConvertRegionToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2104 NLM_EXTERN Boolean ConvertRegionToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2105 NLM_EXTERN Boolean ConvertImpToImpFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2106 NLM_EXTERN Boolean ConvertRegionToRNAFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2107 NLM_EXTERN Boolean ConvertGeneToImpFeatFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2108 NLM_EXTERN Boolean ConvertProtToProtFunc (SeqFeatPtr sfp, Uint2 featdef_to);
2109 NLM_EXTERN Boolean ConvertMiscFeatToGene (SeqFeatPtr sfp);
2110 NLM_EXTERN Boolean ConvertMiscFeatToCodingRegion (SeqFeatPtr sfp);
2111 NLM_EXTERN Boolean ConvertmRNAToCodingRegion (SeqFeatPtr sfp);
2112 NLM_EXTERN Boolean ConverttRNAToGene(SeqFeatPtr sfp);
2113 NLM_EXTERN void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep);
2114 NLM_EXTERN SeqFeatPtr GetProtFeature (BioseqPtr protbsp);
2115 
2116 NLM_EXTERN void InstantiateMatPeptideProducts (SeqEntryPtr sep);
2117 
2118 NLM_EXTERN Boolean CodingRegionHasTranslExcept (SeqFeatPtr sfp);
2119 
2120 NLM_EXTERN SeqEntryPtr SequenceStringToSeqEntry (CharPtr str, SeqIdPtr sip, Uint1 mol_type);
2121 
2122 NLM_EXTERN void RevCompOneFeatForBioseq (SeqFeatPtr sfp, BioseqPtr bsp);
2123 NLM_EXTERN void RevCompFeats (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
2124 
2125 /* for parsing collection dates */
2126 NLM_EXTERN CharPtr ReformatDateStringEx (CharPtr orig_date, Boolean month_first, BoolPtr month_ambiguous);
2127 NLM_EXTERN CharPtr ReformatDateWithMonthNames (CharPtr orig_date);
2128 NLM_EXTERN Int4 GetYearFromToken (CharPtr token, Int4 token_len);
2129 NLM_EXTERN Int4 ReadNumberFromToken (CharPtr token, Int4 token_len);
2130 NLM_EXTERN CharPtr GetMonthFromToken (CharPtr token, Int4 token_len);
2131 NLM_EXTERN Int4 GetMonthNumFromAbbrev (CharPtr month_abbrev);
2132 NLM_EXTERN CharPtr GetMonthAbbrev (Int4 n);
2133 NLM_EXTERN Int4 GetDaysInMonth (Int4 n);
2134 
2135 /* for reformatting assembly date */
2136 NLM_EXTERN CharPtr AssemblyDateFromCollectionDate (CharPtr collection_date, Boolean ambiguous);
2137 NLM_EXTERN Boolean ReformatAssemblyDate (CharPtr PNTR orig_date);
2138 
2139 NLM_EXTERN void CreateStructuredCommentsForAllFromTable (SeqEntryPtr sep, ValNodePtr header, ValNodePtr line, ValNodePtr PNTR err_list);
2140 NLM_EXTERN ValNodePtr CreateStructuredCommentsFromFile (FILE *fp, SeqEntryPtr sep, Boolean apply_to_all);
2141 NLM_EXTERN void AddDatabaseNameToStructuredComment (UserObjectPtr uop, CharPtr dbname);
2142 NLM_EXTERN ValNodePtr CreateStructuredCommentTableFromSeqEntry (SeqEntryPtr sep);
2143 
2144 #define ALNMGR_GAP           -2
2145 #define ALNMGR_ROW_UNDEFINED -1
2146 
2147 NLM_EXTERN void
2148 AlignmentIntervalToString
2149 (SeqAlignPtr salp,
2150  Int4        row,
2151  Int4        start,
2152  Int4        stop,
2153  Int4        target_row,
2154  Boolean     view_whole_entity,
2155  Uint1Ptr    seqbuf,
2156  Uint1Ptr    alnbuf,
2157  Int4 PNTR   alnbuffer_len,
2158  Boolean     show_substitutions);
2159 
2160 extern void CountNsInSequence (
2161   BioseqPtr bsp,
2162   Int4Ptr p_totalN,
2163   Int4Ptr p_totalDash,
2164   Int4Ptr p_totalTilde,
2165   Int4Ptr p_max_stretch,
2166   Boolean expand_gaps,
2167   Boolean no_stretch_in_assembly_gap
2168 );
2169 NLM_EXTERN Boolean IsTSA (BioseqPtr bsp);
2170 
2171 NLM_EXTERN Boolean IsPseudo (SeqFeatPtr sfp);
2172 
2173 NLM_EXTERN Boolean ExtendPartialsToEndOrGap (SeqFeatPtr sfp);
2174 NLM_EXTERN Boolean RetranslateOneCDS
2175 ( SeqFeatPtr sfp,
2176   Uint2 entityID,
2177   Boolean include_stop,
2178   Boolean no_stop_at_end_of_complete_cds);
2179 NLM_EXTERN SeqFeatPtr FindBestProtein (Uint2 entityID, SeqLocPtr product);
2180 NLM_EXTERN void AddNonExtendableException (SeqFeatPtr sfp);
2181 NLM_EXTERN SeqLocPtr GetmRNALocationFromCDSLocation (SeqLocPtr slp, Uint2 entityID);
2182 NLM_EXTERN SeqFeatPtr AddmRNAForCDS (SeqFeatPtr sfp);
2183 NLM_EXTERN Boolean ProductsMatchForRefSeq (CharPtr cds_str, CharPtr mrna_str);
2184 NLM_EXTERN SeqSubmitPtr FindSeqSubmitForSeqEntry (SeqEntryPtr sep);
2185 NLM_EXTERN Boolean CreateMatPeptideFromCDS (SeqFeatPtr sfp);
2186 NLM_EXTERN Boolean ConvertCDSToMatPeptideForOverlappingCDS (SeqFeatPtr sfp, SeqFeatPtr top_cds, Boolean remove_original);
2187 NLM_EXTERN Boolean AutoConvertCDSToMiscFeat (SeqFeatPtr cds, Boolean remove_original);
2188 
2189 NLM_EXTERN AuthListPtr PNTR GetAuthListForPub (PubPtr the_pub);
2190 NLM_EXTERN void RemoveConsortiumFromPub (PubPtr pub);
2191 
2192 NLM_EXTERN Int4 Extend5PartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp, Boolean short_only);
2193 NLM_EXTERN Int4 Extend3PartialSeqIntToEndOrGap (SeqIntPtr sint, BioseqPtr bsp, Boolean short_only);
2194 NLM_EXTERN Int4 ExtendSeqLocToEndOrGap (SeqLocPtr slp, BioseqPtr bsp, Boolean end5);
2195 NLM_EXTERN FloatLo PercentNInBioseq (BioseqPtr bsp, Boolean include_gaps);
2196 NLM_EXTERN FloatLo PercentNInBioseqInterval (BioseqPtr bsp, Int4 start, Int4 stop, Boolean include_gaps);
2197 
2198 NLM_EXTERN SeqEntryPtr GetBestSeqEntryForItem (ValNodePtr vnp);
2199 NLM_EXTERN void AddNewUniqueDescriptors (SeqDescrPtr PNTR new_set, SeqDescrPtr parent_set);
2200 NLM_EXTERN void AddNewUniqueAnnotations (SeqAnnotPtr PNTR new_set, SeqAnnotPtr parent_set);
2201 NLM_EXTERN BioseqSetPtr MakeGroupsForUniqueValues (BioseqSetPtr bssp, ValNodePtr value_lists);
2202 NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberOfSets (Int4 num_sets, SeqEntryPtr sep);
2203 NLM_EXTERN void SegregateSetsByNumber (SeqEntryPtr sep, Int4 num_sets);
2204 NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByNumberPerSet (Int4 num_per_set, SeqEntryPtr sep);
2205 NLM_EXTERN void SegregateSetsByNumberPerSet (SeqEntryPtr sep, Int4 num_per_set);
2206 
2207 NLM_EXTERN void MoveSequencesFromSetToWrapper (ValNodePtr list, Uint2 entityID);
2208 
2209 NLM_EXTERN ValNodePtr CreateStructuredCommentsFromRow (ValNodePtr header, ValNodePtr values, CharPtr id_str, ValNodePtr PNTR err_list);
2210 
2211 NLM_EXTERN void MergeAdjacentAnnotsInList (SeqAnnotPtr sap);
2212 
2213 NLM_EXTERN Boolean GetsDocsumTitle(Uint1 set_class);
2214 NLM_EXTERN void PromoteCommonTitlesToSet (SeqEntryPtr sep);
2215 
2216 NLM_EXTERN void SetDescriptorPropagate (BioseqSetPtr bssp);
2217 
2218 typedef Boolean (*DescriptorTestFunc) PROTO ((SeqDescPtr, Pointer));
2219 NLM_EXTERN void PropagateSomeDescriptors (SeqEntryPtr sep, DescriptorTestFunc test_func, Pointer extradata);
2220 NLM_EXTERN void PropagateDblinkDescriptors (SeqEntryPtr sep);
2221 
2222 NLM_EXTERN Boolean RemoveDuplicateNestedSetsForEntityID (Uint2 entityID);
2223 NLM_EXTERN Boolean RemoveDuplicateNestedSetsForEntityIDNoUpdate (Uint2 entityID);
2224 
2225 NLM_EXTERN void AddStructuredCommentKeywords (Uint2 entityID);
2226 NLM_EXTERN CharPtr KeywordForStructuredCommentPrefix (CharPtr prefix);
2227 NLM_EXTERN CharPtr StructuredCommentPrefixForKeyword (CharPtr keyword);
2228 NLM_EXTERN CharPtr KeywordForStructuredCommentName (UserObjectPtr uop);
2229 NLM_EXTERN Boolean HasKeywordForStructuredCommentName (BioseqPtr bsp, UserObjectPtr uop);
2230 NLM_EXTERN Boolean HasAllKeywordsForStructuredComment (BioseqPtr bsp, CharPtr keyword);
2231 NLM_EXTERN Boolean HasAnyKeywordForStructuredComment (BioseqPtr bsp, CharPtr keyword);
2232 NLM_EXTERN ValNodePtr GetAllStructuredCommentKeywords (void);
2233 NLM_EXTERN void RemoveStructuredCommentKeywords (Uint2 entityID);
2234 NLM_EXTERN void RemoveAllStructuredCommentKeywords (Uint2 entityID);
2235 NLM_EXTERN ValNodePtr SplitStringAtSemicolon (CharPtr keyword);
2236 
2237 NLM_EXTERN void ParseTaxNameToQuals (OrgRefPtr org, TextFsaPtr tags);
2238 
2239 NLM_EXTERN ValNodePtr GetLocusTagPrefixList (SeqEntryPtr sep);
2240 
2241 NLM_EXTERN Boolean IsProductNameOk (CharPtr product_name);
2242 NLM_EXTERN Boolean ReportProductNameProblems (CharPtr product_name, FILE *output_file, CharPtr prefix);
2243 NLM_EXTERN Boolean FixProductNameProblems (CharPtr PNTR product_name);
2244 
2245 NLM_EXTERN SeqEntryPtr ReadFilteredAsn (FILE *fp, Boolean is_binary, CharPtr accn_list, Uint2Ptr entityIDptr);
2246 NLM_EXTERN void ReintegrateFilteredAsn (SeqEntryPtr sep, FILE *orig_file, FILE *output, Boolean is_binary);
2247 
2248 typedef struct descstream {
2249   SeqDescPtr orig;
2250   SeqDescPtr replace;
2251   SeqIdPtr   owners;
2252   SeqIdPtr   last_owner;
2253   Boolean    on_all;
2254   CharPtr    text;
2255   Int4       num_dependent;
2256 } DescStreamData, PNTR DescStreamPtr;
2257 
2258 NLM_EXTERN DescStreamPtr DescStreamNew (SeqDescPtr sdp, BioseqPtr parent);
2259 NLM_EXTERN DescStreamPtr DescStreamFree (DescStreamPtr ds);
2260 NLM_EXTERN ValNodePtr DescStreamListFree (ValNodePtr vnp);
2261 
2262 NLM_EXTERN ValNodePtr StreamAsnForDescriptors (FILE *fp, Boolean is_binary, Boolean is_batch, Boolean is_submit, SeqIdPtr PNTR sip_list);
2263 NLM_EXTERN void WriteAsnWithReplacedDescriptors (ValNodePtr desc_stream_list, FILE *orig_file, FILE *output, Boolean is_binary, Boolean is_batch, Boolean is_submit);
2264 NLM_EXTERN Boolean IdListsMatch (SeqIdPtr sip_list, ValNodePtr all_sip);
2265 NLM_EXTERN void SetOnAllValsForDescStreamList (ValNodePtr desc_list, ValNodePtr all_sip);
2266 
2267 extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset);
2268 
2269 NLM_EXTERN void CleanupOneSeqFeat (SeqFeatPtr sfp);
2270 
2271 NLM_EXTERN Uint1 GetSpecialPlastidGenCode (
2272   CharPtr taxname,
2273   CharPtr lineage
2274 );
2275 
2276 
2277 NLM_EXTERN Boolean TrimPrimerSeqJunkInSeqEntry (SeqEntryPtr sep, FILE *log_fp);
2278 NLM_EXTERN Boolean FixUsaAndStateAbbreviations (Uint2 entityID, FILE *log_fp);
2279 NLM_EXTERN void AdjustSeqEntryForConsensusSplice (SeqEntryPtr sep);
2280 NLM_EXTERN Boolean AdjustSeqEntryForConsensusSpliceEx (SeqEntryPtr sep, FILE *log_fp, Boolean strict);
2281 
2282 NLM_EXTERN void
2283 FixCapitalizationInTitle
2284 (CharPtr PNTR pTitle,
2285  Boolean      first_is_upper,
2286  ValNodePtr   org_names);
2287 
2288 NLM_EXTERN Int4 ConvertCommentsWithSpacesToStructuredCommentsForSeqEntry (SeqEntryPtr sep);
2289 
2290 NLM_EXTERN void ParseExtractorResultsTableToFeatures (FILE *fp, SeqEntryPtr sep);
2291 NLM_EXTERN void ParseRNAFeatListTableToFeatures (FILE *fp, SeqEntryPtr sep, LogInfoPtr lip);
2292 
2293 
2294 #ifdef OS_MSWIN
2295 NLM_EXTERN Int4 RunSilent(const char *cmdline);
2296 #endif
2297 
2298 
2299 NLM_EXTERN CharPtr ValNodeSeqIdName (ValNodePtr vnp);
2300 NLM_EXTERN void ValNodeSeqIdFree (ValNodePtr vnp);
2301 NLM_EXTERN ValNodePtr ValNodeSeqIdCopy (ValNodePtr vnp);
2302 NLM_EXTERN Boolean ValNodeSeqIdMatch (ValNodePtr vnp1, ValNodePtr vnp2);
2303 NLM_EXTERN ValNodePtr ValNodeSeqIdListFree (ValNodePtr list);
2304 NLM_EXTERN ValNodePtr ValNodeSeqIdListCopy (ValNodePtr list);
2305 NLM_EXTERN ValNodePtr SeqIdListToValNodeSeqIdList (SeqIdPtr sip_list);
2306 NLM_EXTERN SeqIdPtr ValNodeSeqIdListToSeqIdList (ValNodePtr vnp_list);
2307 
2308 NLM_EXTERN void StringToLower (CharPtr str);
2309 
2310 NLM_EXTERN ValNodePtr FixupCountryQuals (SeqEntryPtr sep, Boolean fix_after_colon);
2311 NLM_EXTERN Boolean FixupCountryQualsWithLog (SeqEntryPtr sep, Boolean fix_after_colon, FILE *log_fp);
2312 NLM_EXTERN Boolean FixupMouseStrains (SeqEntryPtr sep, FILE *log_fp);
2313 
2314 NLM_EXTERN CharPtr StructuredCommentDbnameFromString (CharPtr string);
2315 NLM_EXTERN ValNodePtr GetStructuredCommentPrefixList (void);
2316 NLM_EXTERN void SetStructuredCommentPrefixAndSuffix (UserObjectPtr uop, CharPtr string);
2317 
2318 extern ValNodePtr GetSourceQualDescListEx (Boolean get_subsrc, Boolean get_orgmod, Boolean get_discouraged, Boolean get_discontinued, Boolean get_subfields);
2319 
2320 NLM_EXTERN Boolean RemoveCultureNotes (SeqEntryPtr sep);
2321 
2322 NLM_EXTERN AuthListPtr GetAuthorListForPub (PubPtr the_pub);
2323 
2324 NLM_EXTERN void FixProductWordCapitalization (CharPtr PNTR pProduct);
2325 NLM_EXTERN Boolean FixSrcQualCaps (SeqEntryPtr sep, Int4 src_qual, FILE *log_fp);
2326 NLM_EXTERN Boolean IsNCBIFileID (SeqIdPtr sip);
2327 
2328 NLM_EXTERN Boolean IsLocationOrganelle (Uint1 genome);
2329 NLM_EXTERN Boolean IsBioseqOrganelle (BioseqPtr bsp);
2330 
2331 NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2);
2332 NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp);
2333 NLM_EXTERN void MakeFeatureXrefsFromProteinIdQuals (SeqEntryPtr sep);
2334 NLM_EXTERN void MakeFeatureXrefsFromTranscriptIdQuals (SeqEntryPtr sep);
2335 NLM_EXTERN void FinishHalfXrefs (SeqEntryPtr sep);
2336 NLM_EXTERN void FlipCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
2337 NLM_EXTERN void RemoveBadCodonRecognizedInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
2338 NLM_EXTERN Uint1 GetAaFromtRNA (tRNAPtr trp);
2339 NLM_EXTERN CharPtr GetCodesFortRNA (SeqFeatPtr sfp, Int2 *pCode);
2340 
2341 NLM_EXTERN void ReverseBioseqInAlignment (SeqAlignPtr salp, Pointer userdata);
2342 NLM_EXTERN void FlipAlignment (SeqAlignPtr salp);
2343 NLM_EXTERN void FlipEntireAlignmentIfAllSequencesFlipped (SeqAnnotPtr sap, Pointer userdata);
2344 NLM_EXTERN ValNodePtr ListSequencesWithAlignments (ValNodePtr bsp_list);
2345 typedef Boolean (LIBCALL *BioseqFunc) (BioseqPtr);
2346 NLM_EXTERN void RevCompBioseqList (ValNodePtr bsp_list,
2347                                    Uint2 entityID,
2348                                    BioseqFunc func,
2349                                    Boolean revCompFeats,
2350                                    Boolean check_for_aln);
2351 NLM_EXTERN Boolean IsBioseqInAnyAlignment (BioseqPtr bsp, Uint2 input_entityID);
2352 NLM_EXTERN Boolean AreAnyElementsOfSetInAnyAlignment (BioseqSetPtr bssp, Uint2 input_entityID);
2353 NLM_EXTERN void RemoveAlignmentsWithSequence (BioseqPtr bsp, Uint2 input_entityID);
2354 NLM_EXTERN void RemoveAlignmentsWithElementsOfSet (BioseqSetPtr bssp, Uint2 input_entityID);
2355 
2356 NLM_EXTERN void ReplaceComplexLocation (SeqLocPtr slp, SeqAlignPtr salp, Int4 new_len, Int4 begin, Int4 fin);
2357 NLM_EXTERN void ReplaceOneSequence (SeqAlignPtr salp, BioseqPtr oldbsp, BioseqPtr newbsp);
2358 NLM_EXTERN Boolean AreSequenceResiduesIdentical (BioseqPtr bsp1, BioseqPtr bsp2);
2359 NLM_EXTERN SeqAlignPtr AlignForSequenceUpdate (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp, GlobalAlignFunc align_func);
2360 NLM_EXTERN void AddCitSubToUpdatedSequence (BioseqPtr upd_bsp, Uint2 input_entityID, CharPtr update_txt);
2361 NLM_EXTERN ValNodePtr CreateUpdateCitSubFromBestTemplate (SeqEntryPtr top_sep, SeqEntryPtr upd_sep, CharPtr update_txt);
2362 NLM_EXTERN void RemoveQualityScores (BioseqPtr bsp, FILE *log_fp, BoolPtr data_in_log);
2363 NLM_EXTERN void ReplaceFakeIDWithIDFromTitle (BioseqPtr bsp);
2364 
2365 typedef  void  (*Nlm_ImportSeqCallbackProc) PROTO ((Int4, Int4, Pointer));
2366 
2367 NLM_EXTERN SeqEntryPtr
2368 ImportNucleotideFASTASequencesFromFileEx
2369 (FILE           *fp,
2370  Boolean         parse_id,
2371  CharPtr         supplied_id_txt,
2372  ValNodePtr PNTR err_msg_list,
2373  BoolPtr         chars_stripped,
2374  Boolean         allow_char_stripping,
2375  Nlm_ImportSeqCallbackProc callback,
2376  Pointer         callback_data);
2377 
2378 NLM_EXTERN SeqEntryPtr
2379 ImportNucleotideFASTASequencesFromFile
2380 (FILE           *fp,
2381  Boolean         parse_id,
2382  CharPtr         supplied_id_txt,
2383  ValNodePtr PNTR err_msg_list,
2384  BoolPtr         chars_stripped,
2385  Boolean         allow_char_stripping);
2386 NLM_EXTERN SeqEntryPtr ImportProteinFASTASequences
2387 (FILE            *fp,
2388  Boolean         parse_id,
2389  CharPtr         supplied_id_txt,
2390  ValNodePtr PNTR err_msg_list,
2391  BoolPtr         chars_stripped);
2392 NLM_EXTERN void AddUniqueUpdateSequenceIDs (SeqEntryPtr sep);
2393 NLM_EXTERN void ListBioseqsInSeqEntry (SeqEntryPtr sep, Boolean is_na, Int4Ptr seq_num, ValNodePtr PNTR bioseq_list);
2394 NLM_EXTERN ValNodePtr ShuffleUpdateBioseqList (ValNodePtr PNTR update_bioseq_list, ValNodePtr orig_bioseq_list);
2395 NLM_EXTERN ValNodePtr GetNthValNode (ValNodePtr list, Int4 n);
2396 NLM_EXTERN ValNodePtr ExtractNthValNode (ValNodePtr PNTR list, Int4 nth);
2397 NLM_EXTERN BioseqPtr FindBioseqInList (ValNodePtr bioseq_list, SeqIdPtr sip, Int4Ptr position);
2398 NLM_EXTERN void ReplaceCollidingUpdateIDs (ValNodePtr update_bioseq_list, ValNodePtr orig_bioseq_list);
2399 NLM_EXTERN Boolean RelaxedSeqIdIn (SeqIdPtr sip, SeqIdPtr sip_list);
2400 NLM_EXTERN void RemoveSequencesWithoutUpdates (ValNodePtr PNTR orig_bioseq_list, ValNodePtr PNTR update_bioseq_list);
2401 extern CharPtr kSubmitterUpdateText;
2402 
2403 NLM_EXTERN SeqLocPtr MakeGeneLocForFeatureLoc (SeqLocPtr floc, Uint2 entityID, Boolean trans_spliced);
2404 
2405 typedef struct lclidlist {
2406   BioseqPtr  firstbsp;
2407   SeqIdPtr   firstsip;
2408   CharPtr    key;
2409   Int2       count;
2410   struct lclidlist PNTR left;
2411   struct lclidlist PNTR right;
2412 } LclIdList, PNTR LclIdListPtr;
2413 
2414 NLM_EXTERN void ResolveExistingIDsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent);
2415 NLM_EXTERN void FreeLclTree (LclIdListPtr PNTR head);
2416 NLM_EXTERN Boolean HasAlignmentsWithLocalIDs (SeqEntryPtr sep);
2417 NLM_EXTERN Boolean DoesStringContainPhrase (CharPtr str, CharPtr phrase, Boolean case_sensitive, Boolean whole_word);
2418 
2419 NLM_EXTERN Int4 UpdateReplacedECNumbers (SeqEntryPtr sep);
2420 NLM_EXTERN Int4 UpdateReplacedECNumbersEx (SeqEntryPtr sep, ValNodePtr PNTR head, ValNodePtr PNTR tail, Boolean only_unambig, Boolean justwarn);
2421 NLM_EXTERN Int4 DeleteBadECNumbers (SeqEntryPtr sep);
2422 NLM_EXTERN Int4 DeleteBadECNumbersEx (SeqEntryPtr sep, ValNodePtr PNTR head, ValNodePtr PNTR tail, Boolean justwarn);
2423 
2424 NLM_EXTERN void SegregateSetsByPlantGroup (SeqEntryPtr sep);
2425 NLM_EXTERN void SegregateSetsByFungusGroup (SeqEntryPtr sep);
2426 NLM_EXTERN ValNodePtr PrepareSequenceListForSegregateByBioseqList (SeqEntryPtr sep, ValNodePtr bsp_list);
2427 NLM_EXTERN void SegregateSetsByBioseqList (SeqEntryPtr sep, ValNodePtr vnp);
2428 NLM_EXTERN Boolean SeqEntryHasPairwiseAlignments (SeqEntryPtr sep);
2429 NLM_EXTERN int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2);
2430 NLM_EXTERN int CompareSequences (BioseqPtr bsp1, BioseqPtr bsp2, Boolean allow_Ndiff);
2431 
2432 NLM_EXTERN Int2 GetGenCodeForBsp (BioseqPtr bsp);
2433 
2434 /* for unverified user objects */
2435 typedef enum unverifiedtype {
2436   eUnverifiedType_Organism  = 0,
2437   eUnverifiedType_Features ,
2438   eUnverifiedType_Misassembled ,
2439   eUnverifiedType_Max
2440 } UnverifiedMatchType;
2441 NLM_EXTERN CharPtr GetUnverifiedMatchName (Int4 unverified_type);
2442 
2443 NLM_EXTERN CharPtr GetRepliconChromosomeName (BioSourcePtr biop);
2444 NLM_EXTERN CharPtr GetRepliconType (BioSourcePtr biop);
2445 NLM_EXTERN CharPtr GetRepliconLocation (BioSourcePtr biop);
2446 
2447 NLM_EXTERN PubPtr ParsePubFromEndnote (FILE *fp);
2448 NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiers (BioseqPtr bsp, Boolean include_subsource);
2449 NLM_EXTERN CharPtr GetDefinitionLineFASTAModifiersByList (BioseqPtr bsp, ValNodePtr list);
2450 
2451 /* for finding frameshifts */
2452 typedef enum {
2453   eFrameShiftReport_NoReport = 0,
2454   eFrameShiftReport_Intron = 1,
2455   eFrameShiftReport_Exon = 2,
2456   eFrameShiftReport_ExonMult3 = 3
2457 } EFrameShiftReport;
2458 
2459 typedef struct frameshiftreport {
2460   CharPtr msg;
2461   Int4    aln_pos;
2462   Int4    first_related_seq;
2463 } FrameShiftReportData, PNTR FrameShiftReportPtr;
2464 
2465 NLM_EXTERN ValNodePtr FrameShiftReportListFree (ValNodePtr vnp);
2466 NLM_EXTERN void PrintFrameShiftReportList (ValNodePtr list, Boolean has_exons, Boolean print_exons_only, LogInfoPtr lip);
2467 NLM_EXTERN ValNodePtr FindFrameShiftsInAlignment (SeqAlignPtr salp, BoolPtr has_exons);
2468 NLM_EXTERN Boolean PropagateMissingOldNames (ValNodePtr sep_list);
2469 NLM_EXTERN CharPtr DescribeBioSourceDifferences (BioSourcePtr biop1, BioSourcePtr biop2);
2470 NLM_EXTERN CharPtr DescribeStructuredCommentDifferences (UserObjectPtr uop1, UserObjectPtr uop2);
2471 NLM_EXTERN Boolean RemoveDuplicateStructuredCommentsInSeqEntry (SeqEntryPtr sep);
2472 NLM_EXTERN ValNodePtr GetSUCCommonList (SeqEntryPtr sep, Boolean reverse, Boolean byblock, Boolean showsequence, Boolean byqual);
2473 
2474 NLM_EXTERN ValNodePtr LookupArticlesWithEutils (ValNodePtr orig_pub, LogInfoPtr lip);
2475 NLM_EXTERN Int4 LookupPubsInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
2476 
2477 NLM_EXTERN void LogTrimmedLocation (LogInfoPtr lip, SeqLocPtr slp);
2478 
2479 NLM_EXTERN void AddListOutputTags(ValNodePtr discrepancy_list, DiscReportOutputConfigPtr oc);
2480 NLM_EXTERN Boolean IsMrnaSequence (BioseqPtr bsp);
2481 NLM_EXTERN BioseqPtr BioseqFromAlignmentID (CharPtr PNTR id_str);
2482 
2483 NLM_EXTERN Int4 TrimNsFromNucsInSeqEntry (SeqEntryPtr sep, LogInfoPtr lip);
2484 NLM_EXTERN void CorrectGenCodes (SeqEntryPtr sep, Uint2 entityID);
2485 NLM_EXTERN int CompareUserFields (UserFieldPtr ufp1, UserFieldPtr ufp2);
2486 
2487 NLM_EXTERN void RemoveEmptyStructuredComments (Uint2 entityID);
2488 
2489 NLM_EXTERN Boolean IsStructuredCommentPrefix (UserFieldPtr ufp);
2490 NLM_EXTERN Boolean IsStructuredCommentSuffix (UserFieldPtr ufp);
2491 NLM_EXTERN CharPtr GetStructuredCommentPrefix (UserObjectPtr uop);
2492 
2493 
2494 typedef struct fielddiff {
2495   ValNodePtr field;
2496   CharPtr seq_id;
2497   CharPtr biosample_id;
2498   CharPtr val1;
2499   CharPtr val2;
2500   ValNodePtr src;
2501 } FieldDiffData, PNTR FieldDiffPtr;
2502 
2503 NLM_EXTERN FieldDiffPtr FieldDiffFree (FieldDiffPtr diff);
2504 NLM_EXTERN ValNodePtr LIBCALL FieldDiffListFree (ValNodePtr list);
2505 NLM_EXTERN ValNodePtr GetBioSourceFieldDiffs (CharPtr seq_id, CharPtr biosample_id, BioSourcePtr biop1, BioSourcePtr biop2, ValNodePtr field_list, Uint1 src_type, Pointer src_data);
2506 NLM_EXTERN ValNodePtr GetStructuredCommentFieldDiffs (CharPtr seq_id, CharPtr biosample_id, UserObjectPtr uop1, UserObjectPtr uop2, ValNodePtr field_list, Uint1 src_type, Pointer src_data);
2507 NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffField (VoidPtr ptr1, VoidPtr ptr2);
2508 NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffBioIdThenField (VoidPtr ptr1, VoidPtr ptr2);
2509 NLM_EXTERN int LIBCALLBACK SortVnpByFieldDiffBiosampleIdThenFieldThenVal (VoidPtr ptr1, VoidPtr ptr2);
2510 
2511 NLM_EXTERN Boolean FindFlankingGenes (SeqLocPtr location, SeqFeatPtr PNTR firstP, SeqFeatPtr PNTR lastP);
2512 NLM_EXTERN void AssignGeneXrefToFeat (SeqFeatPtr sfp, SeqFeatPtr gene);
2513 
2514 
2515 /* for cleanup of BioSources */
2516 NLM_EXTERN void ConsolidateBioSourceNotes (BioSourcePtr biop);
2517 NLM_EXTERN void ConsolidateOneLikeOrganismModifier (OrgModPtr match_to, Boolean use_semicolon);
2518 NLM_EXTERN void ConsolidateOneLikeSubSourceModifier (SubSourcePtr match_to, Boolean use_semicolon);
2519 
2520 #define kAllowManualGenCodeException "genetic code exception"
2521 
2522 
2523 NLM_EXTERN Boolean ReplaceStopsWithSelenocysteineInSeqEntry (SeqEntryPtr sep, FILE *log_fp);
2524 NLM_EXTERN Boolean JoinShortTrnas (SeqEntryPtr sep, FILE *log_fp);
2525 
2526 NLM_EXTERN Boolean IsDBLinkObject (UserObjectPtr uop);
2527 
2528 typedef struct gaplocdata {
2529   Int4     start;
2530   Int4     length;
2531   CharPtr  estimated_length;
2532   CharPtr  gap_type;
2533   CharPtr  linkage_evidence;
2534   Boolean  unknown_length;
2535 } GapLocData, PNTR GapLocPtr;
2536 
2537 
2538 void PopulateGapLocQuals(GapLocPtr glp, SeqFeatPtr sfp, Int4 left, Int4 len);
2539 GapLocPtr GapLocFromSeqFeat(SeqFeatPtr sfp, Int4 left);
2540 Boolean IncompatibleGapFeatQuals (SeqFeatPtr sfp);
2541 void BioseqToDeltaByGapFeat (BioseqPtr bsp, Pointer userdata);
2542 void BioseqToDeltaMergeGapFeat (BioseqPtr bsp, Pointer userdata);
2543 Boolean DeltaLitOnly (BioseqPtr bsp);
2544 Boolean MergeAssemblyGapFeats (BioseqPtr bsp);
2545 
2546 NLM_EXTERN Boolean IsRegulatorySubtype (Uint1 key);
2547 
2548 
2549 #ifdef __cplusplus
2550 }
2551 #endif
2552 
2553 #undef NLM_EXTERN
2554 #ifdef NLM_EXPORT
2555 #define NLM_EXTERN NLM_EXPORT
2556 #else
2557 #define NLM_EXTERN
2558 #endif
2559 
2560 #endif /* ndef _SQNUTILS_ */
2561 
2562