1 /*   macro.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  macro.c
27 *
28 * Author:  Colleen Bollin
29 *
30 * Version Creation Date:   11/8/2007
31 *
32 * $Revision: 1.598 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date     Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 
45 #include <asn.h>
46 #include <objfeat.h>
47 #include <subutil.h>
48 #include <objmgr.h>
49 #include <objfdef.h>
50 #include <gbftdef.h>
51 #include <sqnutils.h>
52 #include <edutil.h>
53 #include <gather.h>
54 #include <ffprint.h>
55 #include <asn2gnbi.h>
56 #include <findrepl.h>
57 #include <utilpub.h>
58 #define NLM_GENERATED_CODE_PROTO
59 #include <objmacro.h>
60 #include <macroapi.h>
61 #include <seqport.h>
62 #include <parsegb.h>
63 #include <salutil.h>
64 #include <valid.h>
65 #include <objvalid.h>
66 #include <valapi.h>
67 #include <tax3api.h>
68 #include <tofasta.h>
69 
70 /* static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data); */
71 static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
72 static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
73 static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list);
74 
GetNucBioseqCallback(BioseqPtr bsp,Pointer userdata)75 static void GetNucBioseqCallback (BioseqPtr bsp, Pointer userdata)
76 
77 {
78   ValNodeBlockPtr  vbp;
79 
80   if (bsp == NULL) return;
81   if (! ISA_na (bsp->mol)) return;
82   vbp = (ValNodeBlockPtr) userdata;
83   if (vbp == NULL) return;
84 
85   ValNodeAddPointerEx (&(vbp->head), &(vbp->tail), OBJ_BIOSEQ, bsp);
86 }
87 
CollectNucBioseqs(SeqEntryPtr sep)88 static ValNodePtr CollectNucBioseqs (SeqEntryPtr sep)
89 
90 {
91   ValNodeBlock  vnb;
92 
93   if (sep == NULL) return NULL;
94 
95   vnb.head = NULL;
96   vnb.tail = NULL;
97 
98   VisitBioseqsInSep (sep, &vnb, GetNucBioseqCallback);
99 
100   return vnb.head;
101 }
102 
IsAllCaps(CharPtr str)103 static Boolean IsAllCaps (CharPtr str)
104 {
105   CharPtr cp;
106 
107   if (StringHasNoText (str)) return FALSE;
108 
109   cp = str;
110   while (*cp != 0) {
111     if (isalpha (*cp)) {
112       if (islower (*cp)) {
113         return FALSE;
114       }
115     }
116     cp++;
117   }
118   return TRUE;
119 }
120 
121 
IsAllLowerCase(CharPtr str)122 static Boolean IsAllLowerCase (CharPtr str)
123 {
124   CharPtr cp;
125 
126   if (StringHasNoText (str)) return FALSE;
127 
128   cp = str;
129   while (*cp != 0) {
130     if (isalpha (*cp)) {
131       if (isupper (*cp)) {
132         return FALSE;
133       }
134     }
135     cp++;
136   }
137   return TRUE;
138 }
139 
140 
IsAllPunctuation(CharPtr str)141 static Boolean IsAllPunctuation (CharPtr str)
142 {
143   CharPtr cp;
144 
145   if (StringHasNoText (str)) return FALSE;
146 
147   cp = str;
148   while (*cp != 0) {
149     if (!ispunct (*cp)) {
150       return FALSE;
151     }
152     cp++;
153   }
154   return TRUE;
155 }
156 
157 
PrintPartialOrCompleteDate(DatePtr date)158 static CharPtr PrintPartialOrCompleteDate(DatePtr date)
159 {
160   CharPtr str = NULL;
161     Char year[5];
162     Char result[15];
163 
164   if (date == NULL) {
165     return NULL;
166   }
167   str = PrintDate(date);
168   if (str == NULL && date->data[0] > 0 && date->data[1]) {
169         if ((int) (date -> data[1]) < 30) {
170             sprintf(year, "%4d", (int) (date -> data[1] + 2000));
171         } else {
172             sprintf(year, "%4d", (int) (date -> data[1] + 1900));
173         }
174     if (date->data[2]) {
175       sprintf(result, "%s %s", NCBI_months[date->data[2] -1 ], year);
176     } else {
177       StringCpy (result, year);
178     }
179     str = StringSave (result);
180   }
181   return str;
182 }
183 
184 
185 static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt);
186 static Int4 CompareRnaTypes (RnaFeatTypePtr rt1, RnaFeatTypePtr rt2);
187 
188 /* NOTES */
189 /* When adding a new field type, add implementation to the following functions:
190  * GetFromFieldFromFieldPair
191  * GetToFieldFromFieldPair
192  * BuildFieldPairFromFromField
193  * FieldTypeChoiceFromFieldPairTypeChoice
194  * CompareFieldTypes
195  * IsObjectAppropriateForFieldValue
196  * GetFieldValueForObject
197  * RemoveFieldValueForObject
198  * SetFieldValueForObject
199  * SortFieldsForObject
200  * GetObjectListForFieldType
201  * GetFieldListForFieldType
202  * IsFieldTypeEmpty
203  * AllowFieldMulti
204  * SummarizeFieldType
205  * GetTargetListForRowAndColumn
206  * ReportMissingTargets
207  * CountObjectsForColumnFields
208  */
209 
210 
FeatureFieldCopy(FeatureFieldPtr orig)211 NLM_EXTERN FeatureFieldPtr FeatureFieldCopy (FeatureFieldPtr orig)
212 {
213   FeatureFieldPtr ff = NULL;
214 
215   if (orig != NULL) {
216     ff = FeatureFieldNew();
217     ff->type = orig->type;
218     if (orig->field != NULL) {
219       ff->field = AsnIoMemCopy (orig->field, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite);
220     }
221   }
222   return ff;
223 }
224 
225 
FieldTypeCopy(FieldTypePtr orig)226 NLM_EXTERN FieldTypePtr FieldTypeCopy (FieldTypePtr orig)
227 {
228   FieldTypePtr ft = NULL;
229   RnaQualPtr   rq, rq_orig;
230 
231   if (orig != NULL) {
232     if (orig->data.ptrvalue == NULL) {
233       ft = ValNodeNew (NULL);
234       ft->choice = orig->choice;
235     } else if (orig->choice == FieldType_feature_field) {
236       ft = ValNodeNew (NULL);
237       ft->choice = FieldType_feature_field;
238       ft->data.ptrvalue = FeatureFieldCopy (orig->data.ptrvalue);
239     } else if (orig->choice == FieldType_rna_field) {
240       ft = ValNodeNew (NULL);
241       ft->choice = FieldType_rna_field;
242       rq_orig = (RnaQualPtr) orig->data.ptrvalue;
243       rq = RnaQualNew ();
244       rq->field = rq_orig->field;
245       rq->type = AsnIoMemCopy (rq_orig->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
246       ft->data.ptrvalue = rq;
247     } else {
248       ft = AsnIoMemCopy (orig, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite);
249     }
250   }
251   return ft;
252 }
253 
254 
255 /* Functions for handling FieldPairs */
GetFromFieldFromFieldPair(FieldPairTypePtr fieldpair)256 NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair)
257 {
258   SourceQualChoicePtr ss = NULL;
259   SourceQualPairPtr sqpp;
260   FeatureFieldPairPtr fp;
261   FeatureFieldPtr fs;
262   RnaQualPairPtr rqp;
263   RnaQualPtr rq;
264   FieldTypePtr f = NULL;
265   CDSGeneProtFieldPairPtr cp;
266   MolinfoFieldPairPtr mp;
267   StructuredCommentFieldPairPtr scfp;
268   DBLinkFieldPairPtr dbfp;
269   ValNodePtr vnp;
270 
271   if (fieldpair == NULL) return NULL;
272   switch (fieldpair->choice) {
273     case FieldPairType_source_qual:
274       sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue;
275       if (sqpp != NULL) {
276         ss = ValNodeNew (NULL);
277         ss->choice = SourceQualChoice_textqual;
278         ss->data.intvalue = sqpp->field_from;
279         f = ValNodeNew (NULL);
280         f->choice = FieldType_source_qual;
281         f->data.ptrvalue = ss;
282       }
283       break;
284     case FieldPairType_feature_field:
285       fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue;
286       if (fp != NULL) {
287         fs = FeatureFieldNew ();
288         fs->type = fp->type;
289         fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_from, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite);
290         f = ValNodeNew (NULL);
291         f->choice = FieldType_feature_field;
292         f->data.ptrvalue = fs;
293       }
294       break;
295     case FieldPairType_rna_field:
296       rqp = (RnaQualPairPtr) fieldpair->data.ptrvalue;
297       if (rqp != NULL) {
298         rq = RnaQualNew ();
299         if (rqp->type != NULL) {
300           rq->type = AsnIoMemCopy (rqp->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
301         }
302         rq->field = rqp->field_from;
303         f = ValNodeNew (NULL);
304         f->choice = FieldType_rna_field;
305         f->data.ptrvalue = rq;
306       }
307       break;
308     case FieldPairType_cds_gene_prot:
309       cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue;
310       if (cp != NULL) {
311         f = ValNodeNew (NULL);
312         f->choice = FieldType_cds_gene_prot;
313         f->data.intvalue = cp->field_from;
314       }
315       break;
316     case FieldPairType_molinfo_field:
317       mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue;
318       if (mp != NULL && mp->data.ptrvalue != NULL) {
319         vnp = NULL;
320         switch (mp->choice) {
321           case MolinfoFieldPair_molecule:
322             vnp = ValNodeNew (NULL);
323             vnp->choice = MolinfoField_molecule;
324             vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->from;
325             break;
326           case MolinfoFieldPair_technique:
327             vnp = ValNodeNew (NULL);
328             vnp->choice = MolinfoField_technique;
329             vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->from;
330             break;
331           case MolinfoFieldPair_completedness:
332             vnp = ValNodeNew (NULL);
333             vnp->choice = MolinfoField_completedness;
334             vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->from;
335             break;
336           case MolinfoFieldPair_mol_class:
337             vnp = ValNodeNew (NULL);
338             vnp->choice = MolinfoField_mol_class;
339             vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->from;
340             break;
341           case MolinfoFieldPair_topology:
342             vnp = ValNodeNew (NULL);
343             vnp->choice = MolinfoField_topology;
344             vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->from;
345             break;
346           case MolinfoFieldPair_strand:
347             vnp = ValNodeNew (NULL);
348             vnp->choice = MolinfoField_strand;
349             vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->from;
350             break;
351         }
352         if (vnp != NULL) {
353           f = ValNodeNew (NULL);
354           f->choice = FieldType_molinfo_field;
355           f->data.ptrvalue = vnp;
356         }
357       }
358       break;
359     case FieldPairType_struc_comment_field:
360       scfp = (StructuredCommentFieldPairPtr) fieldpair->data.ptrvalue;
361       if (scfp != NULL) {
362         f = ValNodeNew (NULL);
363         f->choice = FieldType_struc_comment_field;
364         f->data.ptrvalue = AsnIoMemCopy (scfp->from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite);
365       }
366       break;
367     case FieldPairType_dblink:
368       dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue;
369       if (dbfp != NULL) {
370         f = ValNodeNew (NULL);
371         f->choice = FieldType_dblink;
372         f->data.intvalue = dbfp->from;
373       }
374       break;
375   }
376   return f;
377 }
378 
379 
GetToFieldFromFieldPair(FieldPairTypePtr fieldpair)380 NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair)
381 {
382   SourceQualChoicePtr ss = NULL;
383   SourceQualPairPtr sqpp;
384   FeatureFieldPairPtr fp;
385   FeatureFieldPtr fs;
386   FieldTypePtr f = NULL;
387   RnaQualPairPtr   rqp;
388   RnaQualPtr       rq;
389   CDSGeneProtFieldPairPtr cp;
390   MolinfoFieldPairPtr     mp;
391   StructuredCommentFieldPairPtr scfp;
392   DBLinkFieldPairPtr dbfp;
393   ValNodePtr              vnp;
394 
395   if (fieldpair == NULL) return NULL;
396   switch (fieldpair->choice) {
397     case FieldPairType_source_qual:
398       sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue;
399       if (sqpp != NULL) {
400         ss = ValNodeNew (NULL);
401         ss->choice = SourceQualChoice_textqual;
402         ss->data.intvalue = sqpp->field_to;
403         f = ValNodeNew (NULL);
404         f->choice = FieldType_source_qual;
405         f->data.ptrvalue = ss;
406       }
407       break;
408     case FieldPairType_feature_field:
409       fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue;
410       if (fp != NULL) {
411         fs = FeatureFieldNew ();
412         fs->type = fp->type;
413         fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_to, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite);
414         f = ValNodeNew (NULL);
415         f->choice = FieldType_feature_field;
416         f->data.ptrvalue = fs;
417       }
418       break;
419     case FieldPairType_rna_field:
420       rqp = (RnaQualPairPtr) fieldpair->data.ptrvalue;
421       if (rqp != NULL) {
422         rq = RnaQualNew ();
423         if (rqp->type != NULL) {
424           rq->type = AsnIoMemCopy (rqp->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
425         }
426         rq->field = rqp->field_to;
427         f = ValNodeNew (NULL);
428         f->choice = FieldType_rna_field;
429         f->data.ptrvalue = rq;
430       }
431       break;
432     case FieldPairType_cds_gene_prot:
433       cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue;
434       if (cp != NULL) {
435         f = ValNodeNew (NULL);
436         f->choice = FieldType_cds_gene_prot;
437         f->data.intvalue = cp->field_to;
438       }
439       break;
440     case FieldPairType_molinfo_field:
441       mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue;
442       if (mp != NULL && mp->data.ptrvalue != NULL) {
443         vnp = NULL;
444         switch (mp->choice) {
445           case MolinfoFieldPair_molecule:
446             vnp = ValNodeNew (NULL);
447             vnp->choice = MolinfoField_molecule;
448             vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->to;
449             break;
450           case MolinfoFieldPair_technique:
451             vnp = ValNodeNew (NULL);
452             vnp->choice = MolinfoField_technique;
453             vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->to;
454             break;
455           case MolinfoFieldPair_completedness:
456             vnp = ValNodeNew (NULL);
457             vnp->choice = MolinfoField_completedness;
458             vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->to;
459             break;
460           case MolinfoFieldPair_mol_class:
461             vnp = ValNodeNew (NULL);
462             vnp->choice = MolinfoField_mol_class;
463             vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->to;
464             break;
465           case MolinfoFieldPair_topology:
466             vnp = ValNodeNew (NULL);
467             vnp->choice = MolinfoField_topology;
468             vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->to;
469             break;
470           case MolinfoFieldPair_strand:
471             vnp = ValNodeNew (NULL);
472             vnp->choice = MolinfoField_strand;
473             vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->to;
474             break;
475         }
476         if (vnp != NULL) {
477           f = ValNodeNew (NULL);
478           f->choice = FieldType_molinfo_field;
479           f->data.ptrvalue = vnp;
480         }
481       }
482       break;
483     case FieldPairType_struc_comment_field:
484       scfp = (StructuredCommentFieldPairPtr) fieldpair->data.ptrvalue;
485       if (scfp != NULL) {
486         f = ValNodeNew (NULL);
487         f->choice = FieldType_struc_comment_field;
488         f->data.ptrvalue = AsnIoMemCopy (scfp->to, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite);
489       }
490       break;
491     case FieldPairType_dblink:
492       dbfp = (DBLinkFieldPairPtr) fieldpair->data.ptrvalue;
493       if (dbfp != NULL) {
494         f = ValNodeNew (NULL);
495         f->choice = FieldType_dblink;
496         f->data.intvalue = dbfp->to;
497       }
498       break;
499   }
500   return f;
501 }
502 
503 
BuildFieldPairFromFromField(FieldTypePtr field_from)504 NLM_EXTERN FieldPairTypePtr BuildFieldPairFromFromField (FieldTypePtr field_from)
505 {
506   SourceQualChoicePtr ss = NULL;
507   SourceQualPairPtr sqpp;
508   FeatureFieldPairPtr fp;
509   FeatureFieldPtr fs;
510   RnaQualPairPtr   rqp;
511   RnaQualPtr       rq;
512   CDSGeneProtFieldPairPtr cp;
513   StructuredCommentFieldPairPtr scfp;
514   DBLinkFieldPairPtr dbfp;
515   ValNodePtr     mp;
516   MolinfoMoleculePairPtr      mol_p;
517   MolinfoTechniquePairPtr     tech_p;
518   MolinfoCompletednessPairPtr comp_p;
519   MolinfoMolClassPairPtr      class_p;
520   MolinfoTopologyPairPtr      topo_p;
521   MolinfoStrandPairPtr        strand_p;
522   ValNodePtr              vnp;
523   FieldPairTypePtr        pair = NULL;
524 
525   if (field_from == NULL) return NULL;
526   switch (field_from->choice) {
527     case FieldType_source_qual:
528       pair = ValNodeNew (NULL);
529       pair->choice = FieldPairType_source_qual;
530       ss = (SourceQualChoicePtr) field_from->data.ptrvalue;
531       if (ss != NULL && ss->choice == SourceQualChoice_textqual) {
532         sqpp = SourceQualPairNew ();
533         sqpp->field_from = ss->data.intvalue;
534         pair->data.ptrvalue = sqpp;
535       }
536       break;
537     case FieldType_feature_field:
538       pair = ValNodeNew (NULL);
539       pair->choice = FieldPairType_feature_field;
540       fs = (FeatureFieldPtr) field_from->data.ptrvalue;
541       if (fs != NULL) {
542         fp = FeatureFieldPairNew ();
543         fp->type = fs->type;
544         fp->field_from = (FeatQualChoicePtr) AsnIoMemCopy (fs->field, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite);
545         pair->data.ptrvalue = fp;
546       }
547       break;
548     case FieldType_rna_field:
549       pair = ValNodeNew (NULL);
550       pair->choice = FieldPairType_rna_field;
551       rq = (RnaQualPtr) field_from->data.ptrvalue;
552       if (rq != NULL) {
553         rqp = RnaQualPairNew ();
554         if (rq->type != NULL) {
555           rqp->type = AsnIoMemCopy (rq->type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
556         }
557         rqp->field_from = rq->field;
558         pair->data.ptrvalue = rqp;
559       }
560       break;
561     case FieldType_cds_gene_prot:
562       pair = ValNodeNew (NULL);
563       pair->choice = FieldPairType_cds_gene_prot;
564       cp = CDSGeneProtFieldPairNew ();
565       cp->field_from = field_from->data.intvalue;
566       pair->data.ptrvalue = cp;
567       break;
568     case FieldType_molinfo_field:
569       pair = ValNodeNew (NULL);
570       pair->choice = FieldPairType_molinfo_field;
571       vnp = field_from->data.ptrvalue;
572       if (vnp != NULL) {
573         switch (vnp->choice) {
574           case MolinfoField_molecule:
575             mol_p = MolinfoMoleculePairNew ();
576             mol_p->from = vnp->data.intvalue;
577             mp = ValNodeNew (NULL);
578             mp->choice = MolinfoFieldPair_molecule;
579             mp->data.ptrvalue = mol_p;
580             pair->data.ptrvalue = mp;
581             break;
582           case MolinfoField_technique:
583             tech_p = MolinfoTechniquePairNew ();
584             tech_p->from = vnp->data.intvalue;
585             mp = ValNodeNew (NULL);
586             mp->choice = MolinfoFieldPair_molecule;
587             mp->data.ptrvalue = tech_p;
588             pair->data.ptrvalue = mp;
589             break;
590           case MolinfoField_completedness:
591             comp_p = MolinfoCompletednessPairNew ();
592             comp_p->from = vnp->data.intvalue;
593             mp = ValNodeNew (NULL);
594             mp->choice = MolinfoFieldPair_molecule;
595             mp->data.ptrvalue = comp_p;
596             pair->data.ptrvalue = mp;
597             break;
598           case MolinfoField_mol_class:
599             class_p = MolinfoMolClassPairNew ();
600             class_p->from = vnp->data.intvalue;
601             mp = ValNodeNew (NULL);
602             mp->choice = MolinfoFieldPair_molecule;
603             mp->data.ptrvalue = class_p;
604             pair->data.ptrvalue = mp;
605             break;
606           case MolinfoField_topology:
607             topo_p = MolinfoTopologyPairNew ();
608             topo_p->from = vnp->data.intvalue;
609             mp = ValNodeNew (NULL);
610             mp->choice = MolinfoFieldPair_molecule;
611             mp->data.ptrvalue = topo_p;
612             pair->data.ptrvalue = mp;
613             break;
614           case MolinfoFieldPair_strand:
615             strand_p = MolinfoStrandPairNew ();
616             strand_p->from = vnp->data.intvalue;
617             mp = ValNodeNew (NULL);
618             mp->choice = MolinfoFieldPair_molecule;
619             mp->data.ptrvalue = strand_p;
620             pair->data.ptrvalue = mp;
621             break;
622         }
623       }
624       break;
625     case FieldType_struc_comment_field:
626       pair = ValNodeNew (NULL);
627       pair->choice = FieldPairType_struc_comment_field;
628       scfp = StructuredCommentFieldPairNew ();
629       scfp->from = AsnIoMemCopy (field_from, (AsnReadFunc) StructuredCommentFieldAsnRead, (AsnWriteFunc) StructuredCommentFieldAsnWrite);
630       pair->data.ptrvalue = scfp;
631       break;
632     case FieldType_dblink:
633       pair = ValNodeNew (NULL);
634       pair->choice = FieldPairType_dblink;
635       dbfp = DBLinkFieldPairNew ();
636       dbfp->from = field_from->data.intvalue;
637       pair->data.ptrvalue = dbfp;
638       break;
639   }
640   return pair;
641 }
642 
643 
FieldTypeChoiceFromFieldPairTypeChoice(Uint1 field_pair_choice)644 NLM_EXTERN Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice)
645 {
646   Uint1 field_type_choice = 0;
647 
648   switch (field_pair_choice) {
649     case FieldPairType_source_qual:
650       field_type_choice = FieldType_source_qual;
651       break;
652     case FieldPairType_feature_field:
653       field_type_choice = FieldType_feature_field;
654       break;
655     case FieldPairType_rna_field:
656       field_type_choice = FieldType_rna_field;
657       break;
658     case FieldPairType_cds_gene_prot:
659       field_type_choice = FieldType_cds_gene_prot;
660       break;
661     case FieldPairType_molinfo_field:
662       field_type_choice = FieldType_molinfo_field;
663       break;
664     case FieldPairType_struc_comment_field:
665       field_type_choice = FieldType_struc_comment_field;
666       break;
667     case FieldPairType_dblink:
668       field_type_choice = FieldType_dblink;
669       break;
670   }
671 
672   return field_type_choice;
673 }
674 
675 
676 /* functions for handling single fields */
677 
CompareSourceQuals(VoidPtr ptr1,VoidPtr ptr2)678 static int CompareSourceQuals (VoidPtr ptr1, VoidPtr ptr2)
679 {
680   ValNodePtr  vnp1;
681   ValNodePtr  vnp2;
682   CharPtr     tmp1, tmp2;
683   int         rval = 0;
684 
685   if (ptr1 != NULL && ptr2 != NULL) {
686     vnp1 = *((ValNodePtr PNTR) ptr1);
687     vnp2 = *((ValNodePtr PNTR) ptr2);
688     if (vnp1 == NULL && vnp2 == NULL) {
689       rval = 0;
690     } else if (vnp1 == NULL) {
691       rval = -1;
692     } else if (vnp2 == NULL) {
693       rval = 1;
694     } else if (vnp1->choice > vnp2->choice) {
695       rval = 1;
696     } else if (vnp1->choice < vnp2->choice) {
697       rval = -1;
698     } else if (vnp1->choice == SourceQualChoice_textqual) {
699       if (vnp1->data.intvalue == vnp2->data.intvalue) {
700         return 0;
701       } else if (vnp1->data.intvalue == Source_qual_taxname) {
702         return -1;
703       } else if (vnp2->data.intvalue == Source_qual_taxname) {
704         return 1;
705       } else if (vnp1->data.intvalue == Source_qual_taxid) {
706         return -1;
707       } else if (vnp2->data.intvalue == Source_qual_taxid) {
708         return 1;
709       } else {
710         tmp1 = GetSourceQualName(vnp1->data.intvalue);
711         tmp2 = GetSourceQualName (vnp2->data.intvalue);
712         rval = StringCmp (tmp1, tmp2);
713       }
714     } else if (vnp1->data.intvalue > vnp2->data.intvalue) {
715       rval = 1;
716     } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
717       rval = -1;
718     } else {
719       rval = 0;
720     }
721   }
722   return rval;
723 }
724 
725 static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2);
726 
727 
CompareFieldTypesEx(FieldTypePtr vnp1,FieldTypePtr vnp2,Boolean use_source_qual_sort)728 NLM_EXTERN int CompareFieldTypesEx (FieldTypePtr vnp1, FieldTypePtr vnp2, Boolean use_source_qual_sort)
729 {
730   int rval = 0;
731   FeatureFieldPtr field1, field2;
732   RnaQualPtr rq1, rq2;
733   StructuredCommentFieldPtr scf1, scf2;
734   Int4  v1, v2;
735 
736   if (vnp1 == NULL && vnp2 == NULL) {
737     rval = 0;
738   } else if (vnp1 == NULL) {
739     rval = -1;
740   } else if (vnp2 == NULL) {
741     rval = 1;
742   } else if (vnp1->choice > vnp2->choice) {
743     rval = 1;
744   } else if (vnp1->choice < vnp2->choice) {
745     rval = -1;
746   } else {
747     switch (vnp1->choice) {
748       case FieldType_source_qual:
749         vnp1 = vnp1->data.ptrvalue;
750         vnp2 = vnp2->data.ptrvalue;
751         if (use_source_qual_sort) {
752           rval = CompareSourceQuals(&vnp1, &vnp2);
753         } else {
754           rval = SortVnpByChoiceAndIntvalue (&vnp1, &vnp2);
755         }
756         break;
757       case FieldType_molinfo_field:
758         vnp1 = vnp1->data.ptrvalue;
759         vnp2 = vnp2->data.ptrvalue;
760         rval = SortVnpByChoiceAndIntvalue (&vnp1, &vnp2);
761         break;
762       case FieldType_feature_field:
763         field1 = (FeatureFieldPtr) vnp1->data.ptrvalue;
764         field2 = (FeatureFieldPtr) vnp2->data.ptrvalue;
765         if (field1 == NULL && field2 == NULL) {
766           rval = 0;
767         } else if (field1 == NULL) {
768           rval = -1;
769         } else if (field2 == NULL) {
770           rval = 1;
771         } else if (field1->type < field2->type) {
772           rval = -1;
773         } else if (field1->type > field2->type) {
774           rval = 1;
775         } else if (field1->field == NULL && field2->field == NULL) {
776           rval = 0;
777         } else if (field1->field == NULL) {
778           rval = -1;
779         } else if (field2->field == NULL) {
780           rval = 1;
781         } else if (field1->field->choice < field2->field->choice) {
782           rval = -1;
783         } else if (field1->field->choice > field2->field->choice) {
784           rval = 1;
785         } else {
786           switch (field1->field->choice) {
787             case FeatQualChoice_legal_qual:
788               if (field1->field->data.intvalue < field2->field->data.intvalue) {
789                 rval = -1;
790               } else if (field1->field->data.intvalue > field2->field->data.intvalue) {
791                 rval = 1;
792               }
793               break;
794             case FeatQualChoice_illegal_qual:
795               rval = 0;
796               break;
797           }
798         }
799         break;
800       case FieldType_cds_gene_prot:
801       case FieldType_pub:
802       case FieldType_misc:
803         if (vnp1->data.intvalue > vnp2->data.intvalue) {
804           rval = 1;
805         } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
806           rval = -1;
807         }
808         break;
809       case FieldType_rna_field:
810         rq1 = (RnaQualPtr) vnp1->data.ptrvalue;
811         rq2 = (RnaQualPtr) vnp2->data.ptrvalue;
812         if (rq1 == NULL && rq2 == NULL) {
813           rval = 0;
814         } else if (rq1 == NULL) {
815           rval = -1;
816         } else if (rq2 == NULL) {
817           rval = 1;
818         } else if ((rval = CompareRnaTypes (rq1->type, rq2->type)) == 0) {
819           if (rq1->field < rq2->field) {
820             rval = -1;
821           } else if (rq1->field > rq2->field) {
822             rval = 1;
823           } else {
824             rval = 0;
825           }
826         }
827         break;
828       case FieldType_struc_comment_field:
829         scf1 = (StructuredCommentFieldPtr) vnp1->data.ptrvalue;
830         scf2 = (StructuredCommentFieldPtr) vnp2->data.ptrvalue;
831         if (scf1 == NULL && scf2 == NULL) {
832           rval = 0;
833         } else if (scf1 == NULL) {
834           rval = -1;
835         } else if (scf2 == NULL) {
836           rval = 1;
837         } else if (scf1->choice < scf2->choice) {
838           rval = -1;
839         } else if (scf1->choice > scf2->choice) {
840           rval = 1;
841         } else if (scf1->choice == StructuredCommentField_named) {
842           rval = StringCmp (scf1->data.ptrvalue, scf2->data.ptrvalue);
843         }
844         break;
845       case FieldType_dblink:
846         v1 =  vnp1->data.intvalue;
847         v2 =  vnp2->data.intvalue;
848         if (v1 == v2) {
849           rval = 0;
850         } else if (v1 < v2) {
851           rval = -1;
852         } else {
853           rval = 1;
854         }
855         break;
856     }
857   }
858   return rval;
859 }
860 
861 
CompareFieldTypes(FieldTypePtr vnp1,FieldTypePtr vnp2)862 NLM_EXTERN int CompareFieldTypes (FieldTypePtr vnp1, FieldTypePtr vnp2)
863 {
864   return CompareFieldTypesEx (vnp1, vnp2, FALSE);
865 }
866 
867 
DoFieldTypesMatch(FieldTypePtr field1,FieldTypePtr field2)868 static Boolean DoFieldTypesMatch (FieldTypePtr field1, FieldTypePtr field2)
869 {
870   if (CompareFieldTypes (field1, field2) == 0) {
871     return TRUE;
872   } else {
873     return FALSE;
874   }
875 }
876 
877 static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field);
878 
879 
FeatureTypeFromFieldType(FieldTypePtr field)880 NLM_EXTERN Int2 FeatureTypeFromFieldType (FieldTypePtr field)
881 {
882   Int2 feat_type = Macro_feature_type_any;
883   FeatureFieldPtr ffp;
884   RnaQualPtr      rq;
885 
886   if (field == NULL) {
887     feat_type = Macro_feature_type_any;
888   } else {
889     switch (field->choice) {
890       case FieldType_source_qual:
891         feat_type = Macro_feature_type_biosrc;
892         break;
893       case FieldType_feature_field:
894         ffp = (FeatureFieldPtr) field->data.ptrvalue;
895         if (ffp != NULL) {
896           feat_type = ffp->type;
897         }
898         break;
899       case FieldType_rna_field:
900         rq = (RnaQualPtr) field->data.ptrvalue;
901         if (rq != NULL) {
902           feat_type = GetFeatureTypeForRnaType (rq->type->choice);
903         }
904         break;
905       case FieldType_cds_gene_prot:
906         feat_type = FeatureTypeFromCDSGeneProtField (field->data.intvalue);
907         break;
908     }
909   }
910   return feat_type;
911 }
912 
913 
IsFeatureFieldEmpty(FeatureFieldPtr field)914 NLM_EXTERN Boolean IsFeatureFieldEmpty (FeatureFieldPtr field)
915 {
916   if (field == NULL) return TRUE;
917   if (field->field == NULL) return TRUE;
918   return FALSE;
919 }
920 
921 
MakeFeatureFieldField(Uint2 ftype,Int4 legalqual)922 NLM_EXTERN ValNodePtr MakeFeatureFieldField (Uint2 ftype, Int4 legalqual)
923 {
924   FeatureFieldPtr ff;
925   ValNodePtr field;
926 
927   ff = FeatureFieldNew();
928   ff->type = ftype;
929   ff->field = ValNodeNew (NULL);
930   ff->field->choice = FeatQualChoice_legal_qual;
931   ff->field->data.intvalue = legalqual;
932 
933   field = ValNodeNew (NULL);
934   field->choice = FieldType_feature_field;
935   field->data.ptrvalue = ff;
936   return field;
937 }
938 
939 
IsRnaQualEmpty(RnaQualPtr rq)940 NLM_EXTERN Boolean IsRnaQualEmpty (RnaQualPtr rq)
941 {
942   if (rq == NULL) return TRUE;
943   return FALSE;
944 }
945 
946 
IsFieldTypeEmpty(FieldTypePtr field)947 NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field)
948 {
949   Boolean rval = TRUE;
950   ValNodePtr vnp;
951 
952   if (field == NULL) return TRUE;
953   switch (field->choice) {
954     case FieldType_source_qual:
955       if (field->data.ptrvalue != NULL) {
956         rval = FALSE;
957       }
958       break;
959     case FieldType_feature_field:
960       if (!IsFeatureFieldEmpty (field->data.ptrvalue)) {
961         rval = FALSE;
962       }
963       break;
964     case FieldType_cds_gene_prot:
965       rval = FALSE;
966       break;
967     case FieldType_pub:
968       rval = FALSE;
969       break;
970     case FieldType_rna_field:
971       rval = IsRnaQualEmpty (field->data.ptrvalue);
972       break;
973     case FieldType_struc_comment_field:
974       vnp = field->data.ptrvalue;
975       if (vnp == NULL
976           || (vnp->choice == StructuredCommentField_named && StringHasNoText (vnp->data.ptrvalue))
977           || (vnp->choice != StructuredCommentField_named && vnp->choice != StructuredCommentField_database)) {
978         rval = TRUE;
979       } else {
980         rval = FALSE;
981       }
982       break;
983     case FieldType_dblink:
984       if (field->data.intvalue < 1) {
985         rval = TRUE;
986       } else {
987         rval = FALSE;
988       }
989     case FieldType_misc:
990       rval = FALSE;
991       break;
992     case FieldType_molinfo_field:
993       rval = FALSE;
994       break;
995   }
996   return rval;
997 }
998 
AllowFieldMulti(FieldTypePtr field)999 NLM_EXTERN Boolean AllowFieldMulti (FieldTypePtr field)
1000 {
1001   Boolean rval = FALSE;
1002   FeatureFieldPtr feature_field;
1003 
1004   if (field == NULL) return FALSE;
1005   switch (field->choice) {
1006     case FieldType_source_qual:
1007       rval = AllowSourceQualMulti (field->data.ptrvalue);
1008       break;
1009     case FieldType_feature_field:
1010       feature_field = (FeatureFieldPtr) field->data.ptrvalue;
1011       if (feature_field != NULL && feature_field->field != NULL
1012           && feature_field->field->choice == FeatQualChoice_legal_qual
1013           && (feature_field->field->data.intvalue == Feat_qual_legal_db_xref
1014           || feature_field->field->data.intvalue == Feat_qual_legal_ec_number)) {
1015         rval = TRUE;
1016       }
1017       break;
1018     case FieldType_cds_gene_prot:
1019       if (field->data.intvalue == CDSGeneProt_field_prot_ec_number
1020           || field->data.intvalue == CDSGeneProt_field_mat_peptide_ec_number
1021           || field->data.intvalue == CDSGeneProt_field_gene_synonym) {
1022         rval = TRUE;
1023       }
1024       break;
1025     case FieldType_pub:
1026       break;
1027     case FieldType_rna_field:
1028       if (field->data.intvalue == Rna_field_gene_synonym) {
1029         rval = TRUE;
1030       }
1031       break;
1032     case FieldType_struc_comment_field:
1033       break;
1034     case FieldType_dblink:
1035       rval = TRUE;
1036       break;
1037     case FieldType_misc:
1038       if (field->data.intvalue == Misc_field_keyword) {
1039         rval = TRUE;
1040       }
1041       break;
1042   }
1043   return rval;
1044 }
1045 
1046 
IsUserObjectStructuredComment(UserObjectPtr uop)1047 NLM_EXTERN Boolean IsUserObjectStructuredComment (UserObjectPtr uop)
1048 {
1049   if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "StructuredComment") == 0) {
1050     return TRUE;
1051   } else {
1052     return FALSE;
1053   }
1054 }
1055 
1056 
IsEmptyStructuredComment(UserObjectPtr uop)1057 static Boolean IsEmptyStructuredComment (UserObjectPtr uop)
1058 {
1059   if (!IsUserObjectStructuredComment(uop)) {
1060     return FALSE;
1061   }
1062   if (uop->data == NULL) {
1063     return TRUE;
1064   } else {
1065     return FALSE;
1066   }
1067 }
1068 
1069 
IsUserObjectDBLink(UserObjectPtr uop)1070 static Boolean IsUserObjectDBLink (UserObjectPtr uop)
1071 {
1072   if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) {
1073     return TRUE;
1074   } else {
1075     return FALSE;
1076   }
1077 }
1078 
1079 
IsEmptyDBLink(UserObjectPtr uop)1080 static Boolean IsEmptyDBLink (UserObjectPtr uop)
1081 {
1082   if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0) {
1083     if (uop->data == NULL) {
1084       return TRUE;
1085     } else {
1086       return FALSE;
1087     }
1088   } else {
1089     return FALSE;
1090   }
1091 }
1092 
1093 
IsObjectAppropriateForFieldValue(Uint1 choice,Pointer data,FieldTypePtr field)1094 static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, FieldTypePtr field)
1095 {
1096   SeqFeatPtr        sfp;
1097   SeqDescrPtr       sdp;
1098   FeatureFieldPtr   fp;
1099   RnaQualPtr        rq;
1100   Boolean rval = FALSE;
1101 
1102   if (data == NULL || field == NULL) return FALSE;
1103 
1104   switch (field->choice) {
1105     case FieldType_source_qual :
1106       if (choice == OBJ_SEQFEAT) {
1107         sfp = (SeqFeatPtr) data;
1108         if (sfp->data.choice == SEQFEAT_BIOSRC) {
1109           rval = TRUE;
1110         }
1111       } else if (choice == OBJ_SEQDESC) {
1112         sdp = (SeqDescrPtr) data;
1113         if (sdp->choice == Seq_descr_source) {
1114           rval = TRUE;
1115         }
1116       }
1117       break;
1118     case FieldType_feature_field :
1119       if (choice == OBJ_SEQFEAT) {
1120         sfp = (SeqFeatPtr) data;
1121         fp = (FeatureFieldPtr) field->data.ptrvalue;
1122         if (fp != NULL && (fp->type == Macro_feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) {
1123           rval = TRUE;
1124         }
1125       }
1126       break;
1127     case FieldType_rna_field :
1128       if (choice == OBJ_SEQFEAT) {
1129         sfp = (SeqFeatPtr) data;
1130         rq = (RnaQualPtr) field->data.ptrvalue;
1131         if (rq != NULL && DoesFeatureMatchRnaType (sfp, rq->type)) {
1132           rval = TRUE;
1133         }
1134       }
1135       break;
1136     case FieldType_cds_gene_prot :
1137       if (choice == 0) {
1138         rval = TRUE;
1139       }
1140       break;
1141     case FieldType_molinfo_field :
1142       if (choice == OBJ_BIOSEQ) {
1143         rval = TRUE;
1144       }
1145       break;
1146     case FieldType_pub:
1147       if (choice == OBJ_SEQFEAT) {
1148         sfp = (SeqFeatPtr) data;
1149         if (sfp->data.choice == SEQFEAT_PUB) {
1150           rval = TRUE;
1151         }
1152       } else if (choice == OBJ_SEQDESC) {
1153         sdp = (SeqDescrPtr) data;
1154         if (sdp->choice == Seq_descr_pub) {
1155           rval = TRUE;
1156         }
1157       }
1158       break;
1159     case FieldType_struc_comment_field:
1160       if (choice == OBJ_SEQDESC) {
1161         sdp = (SeqDescrPtr) data;
1162         if (sdp->choice == Seq_descr_user && IsUserObjectStructuredComment (sdp->data.ptrvalue)) {
1163           rval = TRUE;
1164         }
1165       }
1166       break;
1167     case FieldType_dblink:
1168       if (choice == OBJ_SEQDESC) {
1169         sdp = (SeqDescrPtr) data;
1170         if (sdp->choice == Seq_descr_user && IsUserObjectDBLink (sdp->data.ptrvalue)) {
1171           rval = TRUE;
1172         }
1173       }
1174       break;
1175     case FieldType_misc:
1176       if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) {
1177         rval = TRUE;
1178       } else if (choice == OBJ_SEQDESC
1179                  && field->data.intvalue == Misc_field_comment_descriptor
1180                  && (sdp = (SeqDescrPtr) data) != NULL
1181                  && sdp->choice == Seq_descr_comment) {
1182         rval = TRUE;
1183       } else if (choice == OBJ_SEQDESC
1184                  && field->data.intvalue == Misc_field_defline
1185                  && (sdp = (SeqDescrPtr) data) != NULL
1186                  && sdp->choice == Seq_descr_title) {
1187         rval = TRUE;
1188       } else if (choice == OBJ_SEQDESC
1189                  && field->data.intvalue == Misc_field_keyword
1190                  && (sdp = (SeqDescrPtr) data) != NULL
1191                  && sdp->choice == Seq_descr_genbank) {
1192         rval = TRUE;
1193       }
1194       break;
1195   }
1196   return rval;
1197 }
1198 
1199 
IsObjectAppropriateForFieldPair(Uint1 choice,Pointer data,FieldPairTypePtr fieldpair)1200 static Boolean IsObjectAppropriateForFieldPair (Uint1 choice, Pointer data, FieldPairTypePtr fieldpair)
1201 {
1202   FieldTypePtr f;
1203   Boolean rval;
1204 
1205   f = GetFromFieldFromFieldPair(fieldpair);
1206   rval = IsObjectAppropriateForFieldValue(choice, data, f);
1207   f = FieldTypeFree (f);
1208   return rval;
1209 }
1210 
1211 
1212 /* structure and create/free functions for CGPSet, used for handling CDS-Gene-Prot sets */
1213 typedef struct cgpset
1214 {
1215   ValNodePtr cds_list;
1216   ValNodePtr gene_list;
1217   ValNodePtr prot_list;
1218   ValNodePtr mrna_list;
1219 } CGPSetData, PNTR CGPSetPtr;
1220 
1221 
1222 
CGPSetNew(void)1223 static CGPSetPtr CGPSetNew (void)
1224 {
1225   CGPSetPtr c;
1226 
1227   c = (CGPSetPtr) MemNew (sizeof (CGPSetData));
1228   c->cds_list = NULL;
1229   c->gene_list = NULL;
1230   c->prot_list = NULL;
1231   c->mrna_list = NULL;
1232   return c;
1233 }
1234 
1235 
CGPSetFree(CGPSetPtr c)1236 static CGPSetPtr CGPSetFree (CGPSetPtr c)
1237 {
1238   if (c != NULL) {
1239     c->cds_list = ValNodeFree (c->cds_list);
1240     c->gene_list = ValNodeFree (c->gene_list);
1241     c->prot_list = ValNodeFree (c->prot_list);
1242     c->mrna_list = ValNodeFree (c->mrna_list);
1243     c = MemFree (c);
1244   }
1245   return c;
1246 }
1247 
1248 
FreeCGPSetList(ValNodePtr vnp)1249 static ValNodePtr FreeCGPSetList (ValNodePtr vnp)
1250 {
1251   ValNodePtr vnp_next;
1252 
1253   while (vnp != NULL) {
1254     vnp_next = vnp->next;
1255     vnp->next = NULL;
1256     vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue);
1257     vnp = ValNodeFree (vnp);
1258     vnp = vnp_next;
1259   }
1260   return NULL;
1261 }
1262 
1263 static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed);
1264 static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene);
1265 static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna);
1266 
1267 
1268 /* generic functions for mapping fields */
1269 
1270 typedef struct feattypefeatdef {
1271   Int4 feattype;
1272   Int4 featdef;
1273   CharPtr featname;
1274 } FeatTypeFeatDefData, PNTR FeatTypeFeatDefPtr;
1275 
1276 static FeatTypeFeatDefData feattype_featdef[] = {
1277  { Macro_feature_type_any , FEATDEF_ANY , "any" } ,
1278  { Macro_feature_type_gene , FEATDEF_GENE , "gene" } ,
1279  { Macro_feature_type_org , FEATDEF_ORG , "org" } ,
1280  { Macro_feature_type_cds , FEATDEF_CDS , "CDS" } ,
1281  { Macro_feature_type_prot , FEATDEF_PROT , "Protein" } ,
1282  { Macro_feature_type_preRNA , FEATDEF_preRNA , "preRNA" } ,
1283  { Macro_feature_type_mRNA , FEATDEF_mRNA , "mRNA" } ,
1284  { Macro_feature_type_tRNA , FEATDEF_tRNA , "tRNA" } ,
1285  { Macro_feature_type_rRNA , FEATDEF_rRNA , "rRNA" } ,
1286  { Macro_feature_type_snRNA , FEATDEF_snRNA , "snRNA" } ,
1287  { Macro_feature_type_scRNA , FEATDEF_scRNA , "scRNA" } ,
1288  { Macro_feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } ,
1289  { Macro_feature_type_pub , FEATDEF_PUB , "pub" } ,
1290  { Macro_feature_type_seq , FEATDEF_SEQ , "seq" } ,
1291  { Macro_feature_type_imp , FEATDEF_IMP , "imp" } ,
1292  { Macro_feature_type_allele , FEATDEF_allele , "allele" } ,
1293  { Macro_feature_type_attenuator , FEATDEF_attenuator , "attenuator" } ,
1294  { Macro_feature_type_c_region , FEATDEF_C_region , "c_region" } ,
1295  { Macro_feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } ,
1296  { Macro_feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } ,
1297  { Macro_feature_type_d_loop , FEATDEF_D_loop , "d_loop" } ,
1298  { Macro_feature_type_d_segment , FEATDEF_D_segment , "d_segment" } ,
1299  { Macro_feature_type_enhancer , FEATDEF_enhancer , "enhancer" } ,
1300  { Macro_feature_type_exon , FEATDEF_exon , "exon" } ,
1301  { Macro_feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } ,
1302  { Macro_feature_type_iDNA , FEATDEF_iDNA , "iDNA" } ,
1303  { Macro_feature_type_intron , FEATDEF_intron , "intron" } ,
1304  { Macro_feature_type_j_segment , FEATDEF_J_segment , "j_segment" } ,
1305  { Macro_feature_type_ltr , FEATDEF_LTR , "LTR" } ,
1306  { Macro_feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } ,
1307  { Macro_feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } ,
1308  { Macro_feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } ,
1309  { Macro_feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } ,
1310  { Macro_feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } ,
1311  { Macro_feature_type_misc_RNA , FEATDEF_otherRNA , "misc_RNA" } ,
1312  { Macro_feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } ,
1313  { Macro_feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } ,
1314  { Macro_feature_type_modified_base , FEATDEF_modified_base , "modified_base" } ,
1315  { Macro_feature_type_mutation , FEATDEF_mutation , "mutation" } ,
1316  { Macro_feature_type_n_region , FEATDEF_N_region , "n_region" } ,
1317  { Macro_feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } ,
1318  { Macro_feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } ,
1319  { Macro_feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } ,
1320  { Macro_feature_type_precursor_RNA , FEATDEF_preRNA , "precursor_RNA" } ,
1321  { Macro_feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } ,
1322  { Macro_feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } ,
1323  { Macro_feature_type_promoter , FEATDEF_promoter , "promoter" } ,
1324  { Macro_feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } ,
1325  { Macro_feature_type_rbs , FEATDEF_RBS , "rbs" } ,
1326  { Macro_feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } ,
1327  { Macro_feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } ,
1328  { Macro_feature_type_s_region , FEATDEF_S_region , "s_region" } ,
1329  { Macro_feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } ,
1330  { Macro_feature_type_source , FEATDEF_source , "source" } ,
1331  { Macro_feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } ,
1332  { Macro_feature_type_sts , FEATDEF_STS , "sts" } ,
1333  { Macro_feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } ,
1334  { Macro_feature_type_terminator , FEATDEF_terminator , "terminator" } ,
1335  { Macro_feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } ,
1336  { Macro_feature_type_unsure , FEATDEF_unsure , "unsure" } ,
1337  { Macro_feature_type_v_region , FEATDEF_V_region , "v_region" } ,
1338  { Macro_feature_type_v_segment , FEATDEF_V_segment , "v_segment" } ,
1339  { Macro_feature_type_variation , FEATDEF_variation , "variation" } ,
1340  { Macro_feature_type_virion , FEATDEF_virion , "virion" } ,
1341  { Macro_feature_type_n3clip , FEATDEF_3clip , "3'clip" } ,
1342  { Macro_feature_type_n3UTR , FEATDEF_3UTR , "3'UTR" } ,
1343  { Macro_feature_type_n5clip , FEATDEF_5clip , "5'clip" } ,
1344  { Macro_feature_type_n5UTR , FEATDEF_5UTR , "5'UTR" } ,
1345  { Macro_feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } ,
1346  { Macro_feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } ,
1347  { Macro_feature_type_site_ref , FEATDEF_site_ref , "site_ref" } ,
1348  { Macro_feature_type_region , FEATDEF_REGION , "region" } ,
1349  { Macro_feature_type_comment , FEATDEF_COMMENT , "comment" } ,
1350  { Macro_feature_type_bond , FEATDEF_BOND , "bond" } ,
1351  { Macro_feature_type_site , FEATDEF_SITE , "site" } ,
1352  { Macro_feature_type_rsite , FEATDEF_RSITE , "rsite" } ,
1353  { Macro_feature_type_user , FEATDEF_USER , "user" } ,
1354  { Macro_feature_type_txinit , FEATDEF_TXINIT , "txinit" } ,
1355  { Macro_feature_type_num , FEATDEF_NUM , "num" } ,
1356  { Macro_feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } ,
1357  { Macro_feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } ,
1358  { Macro_feature_type_het , FEATDEF_HET , "het" } ,
1359  { Macro_feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } ,
1360  { Macro_feature_type_preprotein , FEATDEF_preprotein , "preprotein" } ,
1361  { Macro_feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } ,
1362  { Macro_feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } ,
1363  { Macro_feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } ,
1364  { Macro_feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } ,
1365  { Macro_feature_type_gap , FEATDEF_gap , "gap" } ,
1366  { Macro_feature_type_operon , FEATDEF_operon , "operon" } ,
1367  { Macro_feature_type_oriT , FEATDEF_oriT , "oriT" } ,
1368  { Macro_feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } ,
1369  { Macro_feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" } ,
1370  { Macro_feature_type_mobile_element, FEATDEF_mobile_element, "mobile_element" } ,
1371  { Macro_feature_type_regulatory, FEATDEF_regulatory, "regulatory" }
1372 };
1373 
1374 #define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData)
1375 
GetFeatdefFromFeatureType(Int4 feature_type)1376 NLM_EXTERN Int4 GetFeatdefFromFeatureType (Int4 feature_type)
1377 {
1378   Int4 i;
1379 
1380   for (i = 0; i < NUM_feattype_featdef; i++) {
1381     if (feature_type == feattype_featdef[i].feattype) {
1382       return feattype_featdef[i].featdef;
1383     }
1384   }
1385   return FEATDEF_BAD;
1386 }
1387 
1388 
GetFeatureTypeFromFeatdef(Int4 featdef)1389 NLM_EXTERN Int4 GetFeatureTypeFromFeatdef (Int4 featdef)
1390 {
1391   Int4 i;
1392 
1393   for (i = 0; i < NUM_feattype_featdef; i++) {
1394     if (featdef == feattype_featdef[i].featdef) {
1395       return feattype_featdef[i].feattype;
1396     }
1397   }
1398   return FEATDEF_BAD;
1399 }
1400 
1401 
GetFeatureNameFromFeatureType(Int4 feature_type)1402 NLM_EXTERN CharPtr GetFeatureNameFromFeatureType (Int4 feature_type)
1403 {
1404   CharPtr str = NULL;
1405   Int4 i;
1406 
1407   for (i = 0; i < NUM_feattype_featdef && str == NULL; i++) {
1408     if (feature_type == feattype_featdef[i].feattype) {
1409       str = feattype_featdef[i].featname;
1410     }
1411   }
1412   if (str == NULL) {
1413     str = "Unknown feature type";
1414   }
1415   return str;
1416 }
1417 
1418 
Matchnamestring(CharPtr name1,CharPtr name2)1419 static Boolean Matchnamestring (CharPtr name1, CharPtr name2)
1420 {
1421   if (name1 == NULL && name2 == NULL) {
1422     return TRUE;
1423   } else if (name1 == NULL || name2 == NULL) {
1424     return FALSE;
1425   } else {
1426     while (*name1 != 0 && *name2 != 0) {
1427       while (*name1 == ' ' || *name1 == '-' || *name1 == '_') {
1428         name1++;
1429       }
1430       while (*name2 == ' ' || *name2 == '-' || *name2 == '_') {
1431         name2++;
1432       }
1433       if (tolower (*name1) != tolower(*name2)) {
1434         return FALSE;
1435       }
1436       name1++;
1437       name2++;
1438     }
1439     if (*name1 == 0 && *name2 == 0) {
1440       return TRUE;
1441     } else {
1442       return FALSE;
1443     }
1444   }
1445 }
1446 
1447 
1448 typedef struct stringalias {
1449   CharPtr alias;
1450   CharPtr canonical;
1451 } StringAliasData, PNTR StringAliasPtr;
1452 
1453 
GetCanonical(CharPtr str,StringAliasPtr alias_list)1454 static CharPtr GetCanonical (CharPtr str, StringAliasPtr alias_list)
1455 {
1456   Int4 i;
1457 
1458   if (alias_list == NULL) {
1459     return str;
1460   }
1461   for (i = 0; alias_list[i].alias != NULL; i++) {
1462     if (Matchnamestring (str, alias_list[i].alias)) {
1463       return alias_list[i].canonical;
1464     }
1465   }
1466   return str;
1467 }
1468 
1469 
GetFeatureTypeByName(CharPtr feat_name)1470 NLM_EXTERN Int4 GetFeatureTypeByName (CharPtr feat_name)
1471 {
1472   Int4 i;
1473 
1474   for (i = 0; i < NUM_feattype_featdef; i++) {
1475     if (Matchnamestring (feattype_featdef[i].featname, feat_name)) {
1476       return feattype_featdef[i].feattype;
1477     }
1478   }
1479   return -1;
1480 }
1481 
1482 
AddImportFeaturesToChoiceList(ValNodePtr PNTR feature_type_list)1483 NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list)
1484 {
1485   Int4 i, seqfeattype;
1486   CharPtr featname;
1487   ValNodePtr tmp_list = NULL;
1488 
1489   for (i = 1; i < NUM_feattype_featdef; i++) {
1490     if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue;
1491     if (feattype_featdef[i].feattype == Macro_feature_type_conflict) continue;
1492     if (IsRegulatorySubtype(feattype_featdef[i].featdef)) continue;
1493     seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef);
1494     if (seqfeattype == SEQFEAT_IMP) {
1495       featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
1496       if (featname != NULL) {
1497         ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname));
1498       }
1499     }
1500   }
1501   tmp_list = ValNodeSort (tmp_list, SortVnpByString);
1502   ValNodeLink (feature_type_list, tmp_list);
1503 }
1504 
1505 
1506 
IsMostUsedFeature(Uint1 val)1507 static Boolean IsMostUsedFeature (Uint1 val)
1508 {
1509   if (val == Macro_feature_type_gene
1510       || val == Macro_feature_type_cds
1511       || val == Macro_feature_type_prot
1512       || val == Macro_feature_type_exon
1513       || val == Macro_feature_type_intron
1514       || val == Macro_feature_type_mRNA
1515       || val == Macro_feature_type_rRNA
1516       || val == Macro_feature_type_otherRNA
1517       || val == Macro_feature_type_misc_feature) {
1518     return TRUE;
1519   } else {
1520     return FALSE;
1521   }
1522 }
1523 
1524 
SortVnpByFeatureName(VoidPtr ptr1,VoidPtr ptr2)1525 static int LIBCALLBACK SortVnpByFeatureName (VoidPtr ptr1, VoidPtr ptr2)
1526 
1527 {
1528   CharPtr     str1;
1529   CharPtr     str2;
1530   ValNodePtr  vnp1;
1531   ValNodePtr  vnp2;
1532   Boolean     most_used1, most_used2;
1533 
1534   if (ptr1 != NULL && ptr2 != NULL) {
1535     vnp1 = *((ValNodePtr PNTR) ptr1);
1536     vnp2 = *((ValNodePtr PNTR) ptr2);
1537     if (vnp1 != NULL && vnp2 != NULL) {
1538       most_used1 = IsMostUsedFeature (vnp1->choice);
1539       most_used2 = IsMostUsedFeature (vnp2->choice);
1540       if (most_used1 && !most_used2) {
1541         return -1;
1542       } else if (!most_used1 && most_used2) {
1543         return 1;
1544       } else {
1545         str1 = (CharPtr) vnp1->data.ptrvalue;
1546         str2 = (CharPtr) vnp2->data.ptrvalue;
1547         if (str1 != NULL && str2 != NULL) {
1548           return StringICmp (str1, str2);
1549         }
1550       }
1551     }
1552   }
1553   return 0;
1554 }
1555 
1556 
AddAllFeaturesToChoiceList(ValNodePtr PNTR feature_type_list)1557 NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list)
1558 {
1559   Int4 i;
1560   CharPtr featname;
1561   ValNodePtr tmp_list = NULL;
1562 
1563   for (i = 1; i < NUM_feattype_featdef; i++) {
1564     if (feattype_featdef[i].feattype == Macro_feature_type_gap) continue;
1565     if (IsRegulatorySubtype(feattype_featdef[i].featdef)) continue;
1566     featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
1567     if (featname != NULL) {
1568       ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname));
1569     }
1570   }
1571   tmp_list = ValNodeSort (tmp_list, SortVnpByFeatureName);
1572   ValNodeLink (feature_type_list, tmp_list);
1573 }
1574 
1575 
1576 typedef struct featqualgbqual {
1577   Int4 featqual;
1578   Int4 gbqual;
1579   Int4 subfield;
1580   CharPtr qualname;
1581 } FeatQualGBQualData, PNTR FeatQualGBQualPtr;
1582 
1583 static FeatQualGBQualData featqual_gbqual[] = {
1584  { Feat_qual_legal_allele , GBQUAL_allele , 0,  "allele" } ,
1585  { Feat_qual_legal_anticodon , GBQUAL_anticodon , 0,  "anticodon" } ,
1586  { Feat_qual_legal_bound_moiety , GBQUAL_bound_moiety , 0,  "bound-moiety" } ,
1587  { Feat_qual_legal_chromosome , GBQUAL_chromosome , 0, "chromosome" } ,
1588  { Feat_qual_legal_citation , GBQUAL_citation , 0, "citation" } ,
1589  { Feat_qual_legal_codon , GBQUAL_codon , 0, "codon" } ,
1590  { Feat_qual_legal_codon_start , GBQUAL_codon_start , 0, "codon-start" } ,
1591  { Feat_qual_legal_compare , GBQUAL_compare , 0, "compare" } ,
1592  { Feat_qual_legal_cons_splice , GBQUAL_cons_splice , 0, "cons-splice" } ,
1593  { Feat_qual_legal_db_xref , GBQUAL_db_xref , 0, "db-xref" } ,
1594  { Feat_qual_legal_direction , GBQUAL_direction , 0, "direction" } ,
1595  { Feat_qual_legal_ec_number , GBQUAL_EC_number , 0, "EC number" } ,
1596  { Feat_qual_legal_environmental_sample , GBQUAL_environmental_sample , 0, "environmental-sample" } ,
1597  { Feat_qual_legal_evidence , GBQUAL_evidence , 0, "evidence" } ,
1598  { Feat_qual_legal_exception , GBQUAL_exception , 0, "exception" } ,
1599  { Feat_qual_legal_experiment , GBQUAL_experiment , 0, "experiment" } ,
1600  { Feat_qual_legal_focus , GBQUAL_focus , 0, "focus" } ,
1601  { Feat_qual_legal_frequency , GBQUAL_frequency , 0, "frequency" } ,
1602  { Feat_qual_legal_function , GBQUAL_function , 0, "function" } ,
1603  { Feat_qual_legal_gene , GBQUAL_gene , 0, "locus" } ,
1604  { Feat_qual_legal_inference , GBQUAL_inference , 0, "inference" } ,
1605  { Feat_qual_legal_location , -1 , 0, "location" } ,
1606  { Feat_qual_legal_locus_tag , GBQUAL_locus_tag , 0, "locus-tag" } ,
1607  { Feat_qual_legal_map , GBQUAL_map , 0, "map" } ,
1608  { Feat_qual_legal_mobile_element_type , GBQUAL_mobile_element_type , 0, "mobile-element-type" } ,
1609  { Feat_qual_legal_mobile_element_type_type , GBQUAL_mobile_element_type , 1, "mobile-element-type-type"} ,
1610  { Feat_qual_legal_mobile_element_name , GBQUAL_mobile_element_type , 2, "mobile-element-name"} ,
1611  { Feat_qual_legal_mod_base , GBQUAL_mod_base , 0, "mod-base" } ,
1612  { Feat_qual_legal_mol_type , GBQUAL_mol_type , 0, "mol-type" } ,
1613  { Feat_qual_legal_name, -1 , 0 , "name" } ,
1614  { Feat_qual_legal_ncRNA_class , GBQUAL_ncRNA_class , 0, "ncRNA-class" } ,
1615  { Feat_qual_legal_note , GBQUAL_note , 0, "note" } ,
1616  { Feat_qual_legal_number , GBQUAL_number , 0, "number" } ,
1617  { Feat_qual_legal_old_locus_tag , GBQUAL_old_locus_tag , 0, "old-locus-tag" } ,
1618  { Feat_qual_legal_operon , GBQUAL_operon , 0, "operon" } ,
1619  { Feat_qual_legal_organism , GBQUAL_organism , 0, "organism" } ,
1620  { Feat_qual_legal_organelle , GBQUAL_organelle , 0, "organelle" } ,
1621  { Feat_qual_legal_partial , GBQUAL_partial , 0, "partial" } ,
1622  { Feat_qual_legal_pcr_conditions, GBQUAL_PCR_conditions , 0, "pcr-conditions" } ,
1623  { Feat_qual_legal_phenotype , GBQUAL_phenotype , 0, "phenotype" } ,
1624  { Feat_qual_legal_plasmid , GBQUAL_plasmid , 0, "plasmid" } ,
1625  { Feat_qual_legal_product , GBQUAL_product , 0, "product" } ,
1626  { Feat_qual_legal_protein_id , GBQUAL_protein_id , 0, "protein-id" } ,
1627  { Feat_qual_legal_pseudo , GBQUAL_pseudogene , 0, "pseudogene" } ,
1628  { Feat_qual_legal_rearranged , GBQUAL_rearranged , 0, "rearranged" } ,
1629  { Feat_qual_legal_regulatory_class , GBQUAL_regulatory_class , 0, "regulatory-class" } ,
1630  { Feat_qual_legal_replace , GBQUAL_replace , 0, "replace" } ,
1631  { Feat_qual_legal_rpt_family , GBQUAL_rpt_family , 0, "rpt-family" } ,
1632  { Feat_qual_legal_rpt_type , GBQUAL_rpt_type , 0, "rpt-type" } ,
1633  { Feat_qual_legal_rpt_unit , GBQUAL_rpt_unit , 0, "rpt-unit" } ,
1634  { Feat_qual_legal_rpt_unit_seq , GBQUAL_rpt_unit_seq , 0, "rpt-unit-seq" } ,
1635  { Feat_qual_legal_rpt_unit_range , GBQUAL_rpt_unit_range , 0, "rpt-unit-range" } ,
1636  { Feat_qual_legal_satellite , GBQUAL_satellite , 0, "satellite" } ,
1637  { Feat_qual_legal_satellite_type , GBQUAL_satellite, 1, "satellite-type"} ,
1638  { Feat_qual_legal_satellite_name , GBQUAL_satellite, 2, "satellite-name"} ,
1639  { Feat_qual_legal_segment , GBQUAL_segment , 0, "segment" } ,
1640  { Feat_qual_legal_sequenced_mol , GBQUAL_sequenced_mol , 0, "sequenced-mol" } ,
1641  { Feat_qual_legal_standard_name , GBQUAL_standard_name , 0, "standard-name" } ,
1642  { Feat_qual_legal_tag_peptide , GBQUAL_tag_peptide , 0, "tag-peptide" } ,
1643  { Feat_qual_legal_transcript_id , GBQUAL_transcript_id , 0, "transcript-id" } ,
1644  { Feat_qual_legal_transgenic , GBQUAL_transgenic , 0, "transgenic" } ,
1645  { Feat_qual_legal_translation , GBQUAL_translation , 0, "translation" } ,
1646  { Feat_qual_legal_transl_except , GBQUAL_transl_except , 0, "transl-except" } ,
1647  { Feat_qual_legal_transl_table , GBQUAL_transl_table , 0, "transl-table" } ,
1648  { Feat_qual_legal_usedin , GBQUAL_usedin , 0, "usedin" }
1649 };
1650 
1651 #define NUM_featqual_gbqual sizeof (featqual_gbqual) / sizeof (FeatQualGBQualData)
1652 
1653 
GetNumFeatQual(void)1654 NLM_EXTERN Int4 GetNumFeatQual (void)
1655 {
1656   return NUM_featqual_gbqual;
1657 }
1658 
1659 
GetGBQualFromFeatQual(Int4 featqual,Int4Ptr subfield)1660 static Int4 GetGBQualFromFeatQual (Int4 featqual, Int4Ptr subfield)
1661 {
1662   Int4 i;
1663 
1664   for (i = 0; i < NUM_featqual_gbqual; i++) {
1665     if (featqual == featqual_gbqual[i].featqual) {
1666       if (subfield != NULL) {
1667         *subfield = featqual_gbqual[i].subfield;
1668       }
1669       return featqual_gbqual[i].gbqual;
1670     }
1671   }
1672   return -1;
1673 }
1674 
1675 
GetFeatQualByGBQualAndSubfield(Int4 gbqual,Int4 subfield)1676 static Int4 GetFeatQualByGBQualAndSubfield (Int4 gbqual, Int4 subfield)
1677 {
1678   Int4 i;
1679 
1680   for (i = 0; i < NUM_featqual_gbqual; i++) {
1681     if (featqual_gbqual[i].gbqual == gbqual && featqual_gbqual[i].subfield == subfield) {
1682       return featqual_gbqual[i].featqual;
1683     }
1684   }
1685   return -1;
1686 }
1687 
1688 
GetFeatQualName(Int4 featqual)1689 NLM_EXTERN CharPtr GetFeatQualName (Int4 featqual)
1690 {
1691   Int4 i;
1692 
1693   for (i = 0; i < NUM_featqual_gbqual; i++) {
1694     if (featqual == featqual_gbqual[i].featqual) {
1695       return featqual_gbqual[i].qualname;
1696     }
1697   }
1698   return NULL;
1699 }
1700 
1701 
GetFeatQualByName(CharPtr qualname)1702 NLM_EXTERN Int4 GetFeatQualByName (CharPtr qualname)
1703 {
1704   Int4 i;
1705 
1706   for (i = 0; i < NUM_featqual_gbqual; i++) {
1707     if (Matchnamestring (featqual_gbqual[i].qualname, qualname)) {
1708       return featqual_gbqual[i].featqual;
1709     }
1710   }
1711   return -1;
1712 }
1713 
1714 
NumGbQualSubfields(Int4 gbqual)1715 static Int4 NumGbQualSubfields (Int4 gbqual)
1716 {
1717   Int4 i, num_subfields = 0;
1718   for (i = 0; i < NUM_featqual_gbqual; i++) {
1719     if (featqual_gbqual[i].gbqual == gbqual) {
1720       if (featqual_gbqual[i].subfield > num_subfields) {
1721         num_subfields = featqual_gbqual[i].subfield;
1722       }
1723     }
1724   }
1725   return num_subfields;
1726 }
1727 
1728 
AddAllFeatureFieldsToChoiceList(ValNodePtr PNTR field_list)1729 NLM_EXTERN void AddAllFeatureFieldsToChoiceList (ValNodePtr PNTR field_list)
1730 {
1731   Int4 i;
1732 
1733   for (i = 0; i < NUM_featqual_gbqual; i++) {
1734     ValNodeAddPointer (field_list, featqual_gbqual[i].featqual, StringSave (featqual_gbqual[i].qualname));
1735   }
1736 }
1737 
1738 
SummarizeFeatQual(ValNodePtr qual)1739 NLM_EXTERN CharPtr SummarizeFeatQual (ValNodePtr qual)
1740 {
1741   if (qual == NULL) {
1742     return StringSave ("unspecified qualifier");
1743   } else if (qual->choice == FeatQualChoice_legal_qual) {
1744     return StringSave (GetFeatQualName (qual->data.intvalue));
1745   } else if (qual->choice == FeatQualChoice_illegal_qual) {
1746     return StringSave (qual->data.ptrvalue);
1747   } else {
1748     return StringSave ("unspecified qualifier");
1749   }
1750 }
1751 
1752 
1753 /* functions for RnaQual values */
1754 
1755 /* functions for RnaType values */
1756 typedef struct rnatypemap {
1757   Int4 rnatype;
1758   Int4 rnaval;
1759   Int4 featuretype;
1760   CharPtr rnaname;
1761 } RnaTypeMapData, PNTR RnaTypeMapPtr;
1762 
1763 static RnaTypeMapData rnatypemap[] = {
1764  { RnaFeatType_preRNA , RNA_TYPE_premsg, Macro_feature_type_preRNA, "preRNA" } ,
1765  { RnaFeatType_mRNA , RNA_TYPE_mRNA, Macro_feature_type_mRNA, "mRNA" } ,
1766  { RnaFeatType_tRNA , RNA_TYPE_tRNA, Macro_feature_type_tRNA, "tRNA" } ,
1767  { RnaFeatType_rRNA , RNA_TYPE_rRNA, Macro_feature_type_rRNA, "rRNA" } ,
1768  { RnaFeatType_ncRNA , RNA_TYPE_ncRNA , Macro_feature_type_ncRNA, "ncRNA" } ,
1769  { RnaFeatType_tmRNA , RNA_TYPE_tmRNA , Macro_feature_type_tmRNA, "tmRNA" } ,
1770  { RnaFeatType_miscRNA , RNA_TYPE_misc_RNA , Macro_feature_type_misc_RNA, "misc_RNA" }
1771 };
1772 
1773 #define NUM_rnatypemap sizeof (rnatypemap) / sizeof (RnaTypeMapData)
1774 
1775 
GetNameForRnaType(Int4 rnatype)1776 static CharPtr GetNameForRnaType (Int4 rnatype)
1777 {
1778   Int4 i;
1779 
1780   for (i = 0; i < NUM_rnatypemap; i++) {
1781     if (rnatypemap[i].rnatype == rnatype) {
1782       return rnatypemap[i].rnaname;
1783     }
1784   }
1785   return NULL;
1786 }
1787 
1788 
GetRnaTypeForName(CharPtr rnaname)1789 static Int4 GetRnaTypeForName (CharPtr rnaname)
1790 {
1791   Int4 i;
1792 
1793   for (i = 0; i < NUM_rnatypemap; i++) {
1794     if (StringCmp (rnatypemap[i].rnaname, rnaname) == 0) {
1795       return rnatypemap[i].rnatype;
1796     }
1797   }
1798   return -1;
1799 }
1800 
1801 
GetRnaValForRnaType(Int4 rnatype)1802 static Int4 GetRnaValForRnaType (Int4 rnatype)
1803 {
1804   Int4 i;
1805 
1806   for (i = 0; i < NUM_rnatypemap; i++) {
1807     if (rnatypemap[i].rnatype == rnatype) {
1808       return rnatypemap[i].rnaval;
1809     }
1810   }
1811   return -1;
1812 }
1813 
1814 
GetFeatureTypeForRnaType(Int4 rnatype)1815 NLM_EXTERN Int4 GetFeatureTypeForRnaType (Int4 rnatype)
1816 {
1817   Int4 i;
1818 
1819   for (i = 0; i < NUM_rnatypemap; i++) {
1820     if (rnatypemap[i].rnatype == rnatype) {
1821       return rnatypemap[i].featuretype;
1822     }
1823   }
1824   return -1;
1825 }
1826 
1827 
GetRNATypeList(void)1828 NLM_EXTERN ValNodePtr GetRNATypeList (void)
1829 {
1830   Int4 i;
1831   ValNodePtr list = NULL;
1832 
1833   for (i = 0; i < NUM_rnatypemap; i++) {
1834     ValNodeAddPointer (&list, rnatypemap[i].rnatype, StringSave (rnatypemap[i].rnaname));
1835   }
1836   return list;
1837 }
1838 
1839 
DoesFeatureMatchRnaType(SeqFeatPtr sfp,RnaFeatTypePtr rt)1840 static Boolean DoesFeatureMatchRnaType (SeqFeatPtr sfp, RnaFeatTypePtr rt)
1841 {
1842   Boolean rval = FALSE;
1843   RnaRefPtr rrp;
1844   RNAGenPtr rgp;
1845   Int4 rnaval;
1846 
1847   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) {
1848     return FALSE;
1849   }
1850   if (rt == NULL || rt->choice == RnaFeatType_any) return TRUE;
1851   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
1852   if (rrp == NULL) return FALSE;
1853 
1854   rnaval = GetRnaValForRnaType (rt->choice);
1855   if (rnaval == rrp->type) {
1856     switch (rt->choice) {
1857       case RnaFeatType_ncRNA:
1858         if (rt->data.ptrvalue == NULL) {
1859           rval = TRUE;
1860         } else if ((rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL && StringCmp (rgp->_class, rt->data.ptrvalue) == 0) {
1861           rval = TRUE;
1862         }
1863         break;
1864       case RnaFeatType_tmRNA:
1865         rval = TRUE;
1866         break;
1867       case RnaFeatType_miscRNA:
1868         rval = TRUE;
1869         break;
1870       default:
1871         rval = TRUE;
1872         break;
1873     }
1874   }
1875   return rval;
1876 }
1877 
1878 
CompareRnaTypes(RnaFeatTypePtr rt1,RnaFeatTypePtr rt2)1879 static Int4 CompareRnaTypes (RnaFeatTypePtr rt1, RnaFeatTypePtr rt2)
1880 {
1881   Int4 rval = 0;
1882 
1883   if (rt1 == NULL && rt2 == NULL) {
1884     rval = 0;
1885   } else if (rt1 == NULL) {
1886     rval = -1;
1887   } else if (rt2 == NULL) {
1888     rval = 1;
1889   } else if (rt1->choice < rt2->choice) {
1890     rval = -1;
1891   } else if (rt1->choice > rt2->choice) {
1892     rval = 1;
1893   } else if (rt1->choice == RnaFeatType_ncRNA) {
1894     if (rt2->data.ptrvalue == NULL) {
1895       rval = 0;
1896     } else {
1897       rval = StringCmp (rt1->data.ptrvalue, rt2->data.ptrvalue);
1898     }
1899   } else {
1900     rval = 0;
1901   }
1902   return rval;
1903 }
1904 
1905 
RnaFeatTypeFromSeqFeat(SeqFeatPtr sfp)1906 static RnaFeatTypePtr RnaFeatTypeFromSeqFeat (SeqFeatPtr sfp)
1907 {
1908   RnaRefPtr rrp;
1909   RnaFeatTypePtr rt = NULL;
1910   RNAGenPtr rgp;
1911 
1912   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
1913     return NULL;
1914   }
1915 
1916   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
1917   switch (rrp->type) {
1918     case RNA_TYPE_premsg:
1919       rt = ValNodeNew (NULL);
1920       rt->choice = RnaFeatType_preRNA;
1921       break;
1922     case RNA_TYPE_mRNA:
1923       rt = ValNodeNew (NULL);
1924       rt->choice = RnaFeatType_mRNA;
1925       break;
1926     case RNA_TYPE_tRNA:
1927       rt = ValNodeNew (NULL);
1928       rt->choice = RnaFeatType_tRNA;
1929       break;
1930     case RNA_TYPE_rRNA:
1931       rt = ValNodeNew (NULL);
1932       rt->choice = RnaFeatType_rRNA;
1933       break;
1934     case RNA_TYPE_ncRNA:
1935       rt = ValNodeNew (NULL);
1936       rt->choice = RnaFeatType_ncRNA;
1937       if (rrp->ext.choice == 3) {
1938         rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
1939         if (rgp != NULL && !StringHasNoText (rgp->_class)) {
1940           rt->data.ptrvalue = StringSave (rgp->_class);
1941         }
1942       }
1943       break;
1944     case RNA_TYPE_tmRNA:
1945       rt = ValNodeNew (NULL);
1946       rt->choice = RnaFeatType_tmRNA;
1947       break;
1948     case RNA_TYPE_misc_RNA:
1949     case 255:
1950       rt = ValNodeNew (NULL);
1951       rt->choice = RnaFeatType_miscRNA;
1952       break;
1953   }
1954   return rt;
1955 }
1956 
1957 
1958 typedef struct rnafieldname {
1959   Int4 field;
1960   Int4    featqual;
1961   CharPtr fieldname;
1962 } RnaFieldNameData, PNTR RnaFieldNamePtr;
1963 
1964 static RnaFieldNameData rnafieldnames[] = {
1965  { Rna_field_product , Feat_qual_legal_product, "product" } ,
1966  { Rna_field_comment , Feat_qual_legal_note, "comment" } ,
1967  { Rna_field_codons_recognized , Feat_qual_legal_codons_recognized, "codons recognized" } ,
1968  { Rna_field_ncrna_class , Feat_qual_legal_ncRNA_class, "ncRNA class" } ,
1969  { Rna_field_tag_peptide , Feat_qual_legal_tag_peptide, "tag-peptide" } ,
1970  { Rna_field_anticodon , Feat_qual_legal_anticodon, "anticodon" } ,
1971  { Rna_field_transcript_id , Feat_qual_legal_transcript_id, "transcript ID" } ,
1972  { Rna_field_gene_locus , Feat_qual_legal_gene, "gene locus" } ,
1973  { Rna_field_gene_description , Feat_qual_legal_gene_description, "gene description" } ,
1974  { Rna_field_gene_maploc , Feat_qual_legal_map, "gene maploc" } ,
1975  { Rna_field_gene_locus_tag , Feat_qual_legal_locus_tag, "gene locus tag" } ,
1976  { Rna_field_gene_synonym , Feat_qual_legal_synonym, "gene synonym" } ,
1977  { Rna_field_gene_comment , Feat_qual_legal_gene_comment, "gene comment" }
1978 };
1979 
1980 #define NUM_rnafieldnames sizeof (rnafieldnames) / sizeof (RnaFieldNameData)
1981 
1982 
GetNameForRnaField(Int4 rnafield)1983 NLM_EXTERN CharPtr GetNameForRnaField (Int4 rnafield)
1984 {
1985   Int4 i;
1986 
1987   for (i = 0; i < NUM_rnafieldnames; i++) {
1988     if (rnafieldnames[i].field == rnafield) {
1989       return rnafieldnames[i].fieldname;
1990     }
1991   }
1992   return NULL;
1993 }
1994 
1995 
GetRnaFieldForName(CharPtr fieldname)1996 static Int4 GetRnaFieldForName (CharPtr fieldname)
1997 {
1998   Int4 i;
1999 
2000   for (i = 0; i < NUM_rnafieldnames; i++) {
2001     if (StringCmp (rnafieldnames[i].fieldname, fieldname) == 0) {
2002       return rnafieldnames[i].field;
2003     }
2004   }
2005   return -1;
2006 }
2007 
2008 
GetRnaFieldList(void)2009 NLM_EXTERN ValNodePtr GetRnaFieldList (void)
2010 {
2011   ValNodePtr list = NULL;
2012   Int4       i;
2013 
2014   for (i = 0; i < NUM_rnafieldnames; i++) {
2015     ValNodeAddPointer (&list, rnafieldnames[i].field, StringSave (rnafieldnames[i].fieldname));
2016   }
2017   return list;
2018 }
2019 
2020 
GetFeatQualForRnaField(Int4 field)2021 static Int4 GetFeatQualForRnaField (Int4 field)
2022 {
2023   Int4 i;
2024 
2025   for (i = 0; i < NUM_rnafieldnames; i++) {
2026     if (rnafieldnames[i].field == field) {
2027       return rnafieldnames[i].featqual;
2028     }
2029   }
2030   return -1;
2031 }
2032 
2033 
FeatureFieldFromRnaQual(RnaQualPtr rq)2034 NLM_EXTERN FeatureFieldPtr FeatureFieldFromRnaQual (RnaQualPtr rq)
2035 {
2036   FeatureFieldPtr ffp = NULL;
2037   Int4 type, qual;
2038 
2039   if (rq == NULL || rq->type == NULL) return NULL;
2040 
2041   type = GetFeatureTypeForRnaType (rq->type->choice);
2042   qual = GetFeatQualForRnaField (rq->field);
2043   if (type >= 0 && qual >= 0) {
2044     ffp = FeatureFieldNew ();
2045     ffp->type = type;
2046     ValNodeAddInt (&(ffp->field), FeatQualChoice_legal_qual, qual);
2047    }
2048   return ffp;
2049 }
2050 
2051 
RnaQualFromFeatureField(FeatureFieldPtr ffp)2052 NLM_EXTERN RnaQualPtr RnaQualFromFeatureField (FeatureFieldPtr ffp)
2053 {
2054   RnaQualPtr rq = NULL;
2055   Int4       i;
2056 
2057   if (ffp != NULL && ffp->field != NULL
2058       && ffp->field->choice == FeatQualChoice_legal_qual) {
2059     for (i = 0; i < NUM_rnafieldnames && rnafieldnames[i].featqual != ffp->field->choice; i++) {
2060     }
2061     if (i < NUM_rnafieldnames) {
2062       rq = RnaQualNew ();
2063       rq->field = rnafieldnames[i].featqual;
2064       rq->type = ValNodeNew (NULL);
2065       switch (ffp->type) {
2066         case Macro_feature_type_preRNA:
2067         case Macro_feature_type_precursor_RNA:
2068           rq->type->choice = RnaFeatType_preRNA;
2069           break;
2070         case Macro_feature_type_mRNA:
2071           rq->type->choice = RnaFeatType_mRNA;
2072           break;
2073         case Macro_feature_type_tRNA:
2074           rq->type->choice = RnaFeatType_tRNA;
2075           break;
2076         case Macro_feature_type_rRNA:
2077           rq->type->choice = RnaFeatType_rRNA;
2078           break;
2079         case Macro_feature_type_snRNA:
2080           rq->type->choice = RnaFeatType_ncRNA;
2081           rq->type->data.ptrvalue = StringSave ("snRNA");
2082           break;
2083         case Macro_feature_type_scRNA:
2084           rq->type->choice = RnaFeatType_ncRNA;
2085           rq->type->data.ptrvalue = StringSave ("scRNA");
2086           break;
2087         case Macro_feature_type_snoRNA:
2088           rq->type->choice = RnaFeatType_ncRNA;
2089           rq->type->data.ptrvalue = StringSave ("snoRNA");
2090           break;
2091         case Macro_feature_type_otherRNA:
2092         case Macro_feature_type_misc_RNA:
2093           rq->type->choice = RnaFeatType_miscRNA;
2094           break;
2095         case Macro_feature_type_ncRNA:
2096           rq->type->choice = RnaFeatType_ncRNA;
2097           break;
2098         case Macro_feature_type_tmRNA:
2099           rq->type->choice = RnaFeatType_tmRNA;
2100           break;
2101         default:
2102           rq = RnaQualFree (rq);
2103           break;
2104       }
2105     }
2106   }
2107   return rq;
2108 }
2109 
2110 
SummarizeRnaType(RnaFeatTypePtr rt)2111 NLM_EXTERN CharPtr SummarizeRnaType (RnaFeatTypePtr rt)
2112 {
2113   CharPtr rnatypename = NULL;
2114   CharPtr fmt = "%s ncRNA";
2115 
2116   if (rt == NULL || rt->choice == RnaFeatType_any) {
2117     rnatypename = StringSave ("Any RNA");
2118   } else if (rt->choice == RnaFeatType_ncRNA) {
2119     if (StringHasNoText (rt->data.ptrvalue)) {
2120       return StringSave ("ncRNA");
2121     } else {
2122       rnatypename = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (rt->data.ptrvalue)));
2123       sprintf (rnatypename, fmt, rt->data.ptrvalue);
2124     }
2125   } else {
2126     rnatypename = StringSave (GetNameForRnaType (rt->choice));
2127   }
2128   return rnatypename;
2129 }
2130 
2131 
SummarizeRnaQual(RnaQualPtr rq)2132 static CharPtr SummarizeRnaQual (RnaQualPtr rq)
2133 {
2134   CharPtr rnatypename, qualname;
2135   CharPtr any_fmt = "RNA %s";
2136   CharPtr fmt = "%s %s";
2137   CharPtr s = NULL;
2138 
2139   if (rq == NULL) return NULL;
2140 
2141   qualname = GetNameForRnaField (rq->field);
2142   if (qualname == NULL) {
2143     return NULL;
2144   }
2145 
2146   rnatypename = SummarizeRnaType (rq->type);
2147 
2148   if (rnatypename == NULL) {
2149     s = (CharPtr) MemNew (sizeof (Char) * (StringLen (any_fmt) + StringLen (qualname)));
2150     sprintf (s, any_fmt, qualname);
2151   } else {
2152     s = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (rnatypename) + StringLen (qualname)));
2153     sprintf (s, fmt, rnatypename, qualname);
2154     rnatypename = MemFree (rnatypename);
2155   }
2156   return s;
2157 }
2158 
2159 
SummarizeStructuredCommentField(StructuredCommentFieldPtr field)2160 static CharPtr SummarizeStructuredCommentField (StructuredCommentFieldPtr field)
2161 {
2162   CharPtr summ = NULL;
2163   CharPtr fmt = "structured comment field %s";
2164 
2165   if (field == NULL) return NULL;
2166 
2167   if (field->choice == StructuredCommentField_database) {
2168     summ = StringSave ("structured comment database");
2169   } else if (field->choice == StructuredCommentField_named) {
2170     summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field->data.ptrvalue)));
2171     sprintf (summ, fmt, field->data.ptrvalue == NULL ? "" : field->data.ptrvalue);
2172   }
2173   return summ;
2174 }
2175 
2176 
2177 #define IS_ORGMOD 1
2178 #define IS_SUBSRC 2
2179 #define IS_OTHER  3
2180 
2181 typedef struct srcqualscqual {
2182   Int4 srcqual;
2183   Int4 subtype;
2184   Int4 typeflag;
2185   Int4 subfield;
2186   CharPtr qualname;
2187 } SrcQualSCQualData, PNTR SrcQualSCQualPtr;
2188 
2189 #define kAllNotesStr "All Notes"
2190 #define kAllQualsStr "All"
2191 #define kAllPrimersStr "All Primers"
2192 
2193 static SrcQualSCQualData srcqual_scqual[] = {
2194  { Source_qual_acronym , ORGMOD_acronym , IS_ORGMOD , 0 , "acronym" } ,
2195  { Source_qual_anamorph , ORGMOD_anamorph , IS_ORGMOD , 0 , "anamorph" } ,
2196  { Source_qual_authority , ORGMOD_authority , IS_ORGMOD , 0 , "authority" } ,
2197  { Source_qual_bio_material , ORGMOD_bio_material , IS_ORGMOD , 0 , "bio-material" } ,
2198  { Source_qual_bio_material_INST , ORGMOD_bio_material , IS_ORGMOD , 1 , "bio-material-inst" } ,
2199  { Source_qual_bio_material_COLL , ORGMOD_bio_material , IS_ORGMOD , 2 , "bio-material-coll" } ,
2200  { Source_qual_bio_material_SpecID , ORGMOD_bio_material , IS_ORGMOD , 3 , "bio-material-specid" } ,
2201  { Source_qual_biotype , ORGMOD_biotype , IS_ORGMOD , 0 , "biotype" } ,
2202  { Source_qual_biovar , ORGMOD_biovar , IS_ORGMOD , 0 , "biovar" } ,
2203  { Source_qual_breed , ORGMOD_breed , IS_ORGMOD , 0 , "breed" } ,
2204  { Source_qual_cell_line , SUBSRC_cell_line , IS_SUBSRC , 0 , "cell-line" } ,
2205  { Source_qual_cell_type , SUBSRC_cell_type , IS_SUBSRC , 0 , "cell-type" } ,
2206  { Source_qual_chemovar , ORGMOD_chemovar , IS_ORGMOD , 0 , "chemovar" } ,
2207  { Source_qual_chromosome , SUBSRC_chromosome , IS_SUBSRC , 0 , "chromosome" } ,
2208  { Source_qual_clone , SUBSRC_clone , IS_SUBSRC , 0 , "clone" } ,
2209  { Source_qual_clone_lib , SUBSRC_clone_lib , IS_SUBSRC , 0 , "clone-lib" } ,
2210  { Source_qual_collected_by , SUBSRC_collected_by , IS_SUBSRC , 0 , "collected-by" } ,
2211  { Source_qual_collection_date , SUBSRC_collection_date , IS_SUBSRC , 0 , "collection-date" } ,
2212  { Source_qual_common , ORGMOD_common , IS_ORGMOD , 0 , "common" } ,
2213  { Source_qual_common_name , 0 , IS_OTHER , 0 , "common name" } ,
2214  { Source_qual_country , SUBSRC_country , IS_SUBSRC , 0 , "country" } ,
2215  { Source_qual_cultivar , ORGMOD_cultivar , IS_ORGMOD , 0 , "cultivar" } ,
2216  { Source_qual_culture_collection , ORGMOD_culture_collection , IS_ORGMOD , 0 , "culture-collection" } ,
2217  { Source_qual_culture_collection_INST , ORGMOD_culture_collection , IS_ORGMOD , 1 , "culture-collection-inst" } ,
2218  { Source_qual_culture_collection_COLL , ORGMOD_culture_collection , IS_ORGMOD , 2 , "culture-collection-coll" } ,
2219  { Source_qual_culture_collection_SpecID , ORGMOD_culture_collection , IS_ORGMOD , 3 , "culture-collection-specid" } ,
2220  { Source_qual_dbxref , 0 , IS_OTHER , 0 , "dbxref" } ,
2221  { Source_qual_dev_stage , SUBSRC_dev_stage , IS_SUBSRC , 0 , "dev-stage" } ,
2222  { Source_qual_division , 0 , IS_OTHER, 0 , "division" } ,
2223  { Source_qual_dosage , ORGMOD_dosage , IS_ORGMOD , 0 , "dosage" } ,
2224  { Source_qual_ecotype , ORGMOD_ecotype , IS_ORGMOD , 0 , "ecotype" } ,
2225  { Source_qual_endogenous_virus_name , SUBSRC_endogenous_virus_name , IS_SUBSRC , 0 , "endogenous-virus-name" } ,
2226  { Source_qual_environmental_sample , SUBSRC_environmental_sample , IS_SUBSRC , 0 , "environmental-sample" } ,
2227  { Source_qual_forma , ORGMOD_forma , IS_ORGMOD , 0 , "forma" } ,
2228  { Source_qual_forma_specialis , ORGMOD_forma_specialis , IS_ORGMOD , 0 , "forma-specialis" } ,
2229  { Source_qual_frequency , SUBSRC_frequency , IS_SUBSRC , 0 , "frequency" } ,
2230  { Source_qual_fwd_primer_name , SUBSRC_fwd_primer_name , IS_SUBSRC , 0 , "fwd-primer-name" } ,
2231  { Source_qual_fwd_primer_seq , SUBSRC_fwd_primer_seq , IS_SUBSRC , 0 , "fwd-primer-seq" } ,
2232  { Source_qual_gb_acronym , ORGMOD_gb_acronym , IS_ORGMOD , 0 , "gb-acronym" } ,
2233  { Source_qual_gb_anamorph , ORGMOD_gb_anamorph , IS_ORGMOD , 0 , "gb-anamorph" } ,
2234  { Source_qual_gb_synonym , ORGMOD_gb_synonym , IS_ORGMOD , 0 , "gb-synonym" } ,
2235  { Source_qual_genotype , SUBSRC_genotype , IS_SUBSRC , 0 , "genotype" } ,
2236  { Source_qual_germline , SUBSRC_germline , IS_SUBSRC , 0 , "germline" } ,
2237  { Source_qual_group , ORGMOD_group , IS_ORGMOD , 0 , "group" } ,
2238  { Source_qual_haplotype , SUBSRC_haplotype , IS_SUBSRC , 0 , "haplotype" } ,
2239  { Source_qual_identified_by , SUBSRC_identified_by , IS_SUBSRC , 0 , "identified-by" } ,
2240  { Source_qual_insertion_seq_name , SUBSRC_insertion_seq_name , IS_SUBSRC , 0 , "insertion-seq-name" } ,
2241  { Source_qual_isolate , ORGMOD_isolate , IS_ORGMOD , 0 , "isolate" } ,
2242  { Source_qual_isolation_source , SUBSRC_isolation_source , IS_SUBSRC , 0 , "isolation-source" } ,
2243  { Source_qual_lab_host , SUBSRC_lab_host , IS_SUBSRC , 0 , "lab-host" } ,
2244  { Source_qual_lat_lon , SUBSRC_lat_lon , IS_SUBSRC , 0 , "lat-lon" } ,
2245  { Source_qual_lineage , 0, IS_OTHER, 0 , "lineage" } ,
2246  { Source_qual_map , SUBSRC_map , IS_SUBSRC , 0 , "map" } ,
2247  { Source_qual_metagenome_source , ORGMOD_metagenome_source , IS_ORGMOD , 0 , "metagenome-source" } ,
2248  { Source_qual_metagenomic , SUBSRC_metagenomic , IS_SUBSRC , 0 , "metagenomic" } ,
2249  { Source_qual_old_lineage , ORGMOD_old_lineage , IS_ORGMOD , 0 , "old-lineage" } ,
2250  { Source_qual_old_name , ORGMOD_old_name , IS_ORGMOD , 0 , "old-name" } ,
2251  { Source_qual_orgmod_note , ORGMOD_other, IS_ORGMOD, 0 , "note-orgmod" } ,
2252  { Source_qual_pathovar , ORGMOD_pathovar , IS_ORGMOD , 0 , "pathovar" } ,
2253  { Source_qual_plasmid_name , SUBSRC_plasmid_name , IS_SUBSRC , 0 , "plasmid-name" } ,
2254  { Source_qual_plastid_name , SUBSRC_plastid_name , IS_SUBSRC , 0 , "plastid-name" } ,
2255  { Source_qual_pop_variant , SUBSRC_pop_variant , IS_SUBSRC , 0 , "pop-variant" } ,
2256  { Source_qual_rearranged , SUBSRC_rearranged , IS_SUBSRC , 0 , "rearranged" } ,
2257  { Source_qual_rev_primer_name , SUBSRC_rev_primer_name , IS_SUBSRC , 0 , "rev-primer-name" } ,
2258  { Source_qual_rev_primer_seq , SUBSRC_rev_primer_seq , IS_SUBSRC , 0 , "rev-primer-seq" } ,
2259  { Source_qual_segment , SUBSRC_segment , IS_SUBSRC , 0 , "segment" } ,
2260  { Source_qual_serogroup , ORGMOD_serogroup , IS_ORGMOD , 0 , "serogroup" } ,
2261  { Source_qual_serotype , ORGMOD_serotype , IS_ORGMOD , 0 , "serotype" } ,
2262  { Source_qual_serovar , ORGMOD_serovar , IS_ORGMOD , 0 , "serovar" } ,
2263  { Source_qual_sex , SUBSRC_sex , IS_SUBSRC , 0 , "sex" } ,
2264  { Source_qual_nat_host , ORGMOD_nat_host , IS_ORGMOD , 0 , "host" } ,
2265  { Source_qual_specimen_voucher , ORGMOD_specimen_voucher , IS_ORGMOD , 0 , "specimen-voucher" } ,
2266  { Source_qual_specimen_voucher_INST , ORGMOD_specimen_voucher , IS_ORGMOD , 1 , "specimen-voucher-inst" } ,
2267  { Source_qual_specimen_voucher_COLL , ORGMOD_specimen_voucher , IS_ORGMOD , 2 , "specimen-voucher-coll" } ,
2268  { Source_qual_specimen_voucher_SpecID , ORGMOD_specimen_voucher , IS_ORGMOD , 3 , "specimen-voucher-specid" } ,
2269  { Source_qual_strain , ORGMOD_strain , IS_ORGMOD , 0 , "strain" } ,
2270  { Source_qual_subclone , SUBSRC_subclone , IS_SUBSRC , 0 , "subclone" } ,
2271  { Source_qual_subgroup , ORGMOD_subgroup , IS_ORGMOD , 0 , "subgroup" } ,
2272  { Source_qual_subsource_note , SUBSRC_other , IS_SUBSRC , 0 , "note-subsrc" } ,
2273  { Source_qual_sub_species , ORGMOD_sub_species , IS_ORGMOD , 0 , "sub-species" } ,
2274  { Source_qual_substrain , ORGMOD_substrain , IS_ORGMOD , 0 , "substrain" } ,
2275  { Source_qual_subtype , ORGMOD_subtype , IS_ORGMOD , 0 , "subtype" } ,
2276  { Source_qual_synonym , ORGMOD_synonym , IS_ORGMOD , 0 , "synonym" } ,
2277  { Source_qual_taxname , 0 , IS_OTHER , 0 , "taxname" } ,
2278  { Source_qual_teleomorph , ORGMOD_teleomorph , IS_ORGMOD , 0 , "teleomorph" } ,
2279  { Source_qual_tissue_lib , SUBSRC_tissue_lib , IS_SUBSRC , 0 , "tissue-lib" } ,
2280  { Source_qual_tissue_type , SUBSRC_tissue_type , IS_SUBSRC , 0 , "tissue-type" } ,
2281  { Source_qual_transgenic , SUBSRC_transgenic , IS_SUBSRC , 0 , "transgenic" } ,
2282  { Source_qual_transposon_name , SUBSRC_transposon_name , IS_SUBSRC , 0 , "transposon-name" } ,
2283  { Source_qual_type , ORGMOD_type , IS_ORGMOD , 0 , "type" } ,
2284  { Source_qual_type_material , ORGMOD_type_material , IS_ORGMOD , 0 , "type-material" } ,
2285  { Source_qual_variety , ORGMOD_variety , IS_ORGMOD , 0 , "variety" } ,
2286  { Source_qual_all_notes , 255 , IS_OTHER , 0 , kAllNotesStr } ,
2287  { Source_qual_all_quals , 0 , IS_OTHER , 0, kAllQualsStr } ,
2288  { Source_qual_mating_type , SUBSRC_mating_type , IS_SUBSRC , 0 , "mating-type" } ,
2289  { Source_qual_linkage_group , SUBSRC_linkage_group , IS_SUBSRC , 0 , "linkage-group" } ,
2290  { Source_qual_haplogroup , SUBSRC_haplogroup, IS_SUBSRC, 0, "haplogroup"} ,
2291  { Source_qual_taxid , 0 , IS_OTHER , 0 , "taxid" } ,
2292  { Source_qual_all_primers , 0, IS_OTHER , 0, kAllPrimersStr } ,
2293  { Source_qual_altitude , SUBSRC_altitude, IS_SUBSRC , 0 , "altitude"}
2294 };
2295 
2296 #define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData)
2297 
2298 static StringAliasData src_qual_alias_list[] = {
2299   {"organism", "taxname"},
2300   {"organism name", "taxname"},
2301   {"date", "collection-date"},
2302   {"voucher", "specimen-voucher"},
2303   {"specific-host", "host"},
2304   {"note sub-source", "note-subsrc"},
2305   { NULL, NULL}
2306 };
2307 
2308 
GetSubSrcQualFromSrcQual(Int4 srcqual,Int4Ptr subfield)2309 NLM_EXTERN Int4 GetSubSrcQualFromSrcQual (Int4 srcqual, Int4Ptr subfield)
2310 {
2311   Int4 i;
2312 
2313   for (i = 0; i < NUM_srcqual_scqual; i++) {
2314     if (srcqual == srcqual_scqual[i].srcqual) {
2315       if (srcqual_scqual[i].typeflag == IS_SUBSRC) {
2316         if (subfield != NULL) {
2317           *subfield = srcqual_scqual[i].subfield;
2318         }
2319         return srcqual_scqual[i].subtype;
2320       } else {
2321         return -1;
2322       }
2323     }
2324   }
2325   return -1;
2326 }
2327 
2328 
GetOrgModQualFromSrcQual(Int4 srcqual,Int4Ptr subfield)2329 NLM_EXTERN Int4 GetOrgModQualFromSrcQual (Int4 srcqual, Int4Ptr subfield)
2330 {
2331   Int4 i;
2332 
2333   for (i = 0; i < NUM_srcqual_scqual; i++) {
2334     if (srcqual == srcqual_scqual[i].srcqual) {
2335       if (srcqual_scqual[i].typeflag == IS_ORGMOD) {
2336         if (subfield != NULL) {
2337           *subfield = srcqual_scqual[i].subfield;
2338         }
2339         return srcqual_scqual[i].subtype;
2340       } else {
2341         return -1;
2342       }
2343     }
2344   }
2345   return -1;
2346 }
2347 
2348 
GetSrcQualFromSubSrcOrOrgMod(Int4 qual,Boolean is_org_mod)2349 NLM_EXTERN Int4 GetSrcQualFromSubSrcOrOrgMod (Int4 qual, Boolean is_org_mod)
2350 {
2351   Int4 i;
2352 
2353   for (i = 0; i < NUM_srcqual_scqual; i++) {
2354     if (qual == srcqual_scqual[i].subtype
2355         && ((is_org_mod && srcqual_scqual[i].typeflag == IS_ORGMOD)
2356         || (!is_org_mod && srcqual_scqual[i].typeflag == IS_SUBSRC))) {
2357       return srcqual_scqual[i].srcqual;
2358     }
2359   }
2360   return -1;
2361 }
2362 
2363 
IsNonTextSourceQual(Int4 srcqual)2364 NLM_EXTERN Boolean IsNonTextSourceQual (Int4 srcqual)
2365 {
2366   if (srcqual == Source_qual_transgenic
2367       || srcqual == Source_qual_germline
2368       || srcqual == Source_qual_metagenomic
2369       || srcqual == Source_qual_environmental_sample
2370       || srcqual == Source_qual_rearranged)
2371   {
2372     return TRUE;
2373   }
2374   else
2375   {
2376     return FALSE;
2377   }
2378 }
2379 
2380 
IsNonTextFieldType(FieldTypePtr field)2381 NLM_EXTERN Boolean IsNonTextFieldType (FieldTypePtr field)
2382 {
2383   ValNodePtr vnp;
2384 
2385   if (field == NULL) {
2386     return FALSE;
2387   } else if (field->choice == FieldType_molinfo_field) {
2388     return TRUE;
2389   } else if (field->choice != FieldType_source_qual) {
2390     return FALSE;
2391   } else if ((vnp = field->data.ptrvalue) == NULL) {
2392     return FALSE;
2393   } else if (vnp->choice != SourceQualChoice_textqual) {
2394     return FALSE;
2395   } else {
2396     return IsNonTextSourceQual (vnp->data.intvalue);
2397   }
2398 }
2399 
2400 
GetSourceQualName(Int4 srcqual)2401 NLM_EXTERN CharPtr GetSourceQualName (Int4 srcqual)
2402 {
2403   CharPtr str = NULL;
2404   Int4    i;
2405 
2406   for (i = 0; i < NUM_srcqual_scqual && str == NULL; i++) {
2407     if (srcqual_scqual[i].srcqual == srcqual) {
2408       str = srcqual_scqual[i].qualname;
2409     }
2410   }
2411   if (str == NULL) {
2412     str = "Unknown source qualifier";
2413   }
2414   return str;
2415 }
2416 
2417 
GetSourceQualTypeByName(CharPtr qualname)2418 NLM_EXTERN Int4 GetSourceQualTypeByName (CharPtr qualname)
2419 {
2420   Int4    i;
2421 
2422   qualname = GetCanonical (qualname, src_qual_alias_list);
2423   for (i = 0; i < NUM_srcqual_scqual; i++) {
2424     if (Matchnamestring(srcqual_scqual[i].qualname, qualname)) {
2425       return srcqual_scqual[i].srcqual;
2426     }
2427   }
2428   if (StringICmp (qualname, "subsp.") == 0) {
2429     return Source_qual_sub_species;
2430   } else if (StringICmp (qualname, "var.") == 0) {
2431     return Source_qual_variety;
2432   } else if (StringICmp (qualname, "str.") == 0) {
2433     return Source_qual_strain;
2434   } else if (StringICmp (qualname, "note") == 0) {
2435     return Source_qual_orgmod_note;
2436   } else if (Matchnamestring (qualname, "latitude-longitude")
2437       || Matchnamestring (qualname, "lat-long")) {
2438     return Source_qual_lat_lon;
2439   }
2440   return -1;
2441 }
2442 
2443 
GetSourceQualList(Boolean for_remove)2444 NLM_EXTERN ValNodePtr GetSourceQualList (Boolean for_remove)
2445 {
2446   ValNodePtr list = NULL, tmp = NULL, last = NULL;
2447   Int4 i;
2448 
2449   if (for_remove) {
2450     ValNodeAddPointer (&list, 0, StringSave (kAllQualsStr));
2451     last = ValNodeAddPointer (&list, 0, StringSave (kAllNotesStr));
2452     last = ValNodeAddPointer (&list, 0, StringSave (kAllPrimersStr));
2453   }
2454   for (i = 0; i < NUM_srcqual_scqual; i++) {
2455     if (srcqual_scqual[i].srcqual != Source_qual_all_notes
2456         && srcqual_scqual[i].srcqual != Source_qual_all_quals
2457         && srcqual_scqual[i].srcqual != Source_qual_all_primers) {
2458       ValNodeAddPointer (&tmp, 0, StringSave (srcqual_scqual[i].qualname));
2459     }
2460   }
2461   tmp = ValNodeSort (tmp, SortVnpByString);
2462   if (last == NULL) {
2463     list = tmp;
2464   } else {
2465     last->next = tmp;
2466   }
2467   return list;
2468 }
2469 
2470 
GetSourceQualFieldListFromBioSource(BioSourcePtr biop)2471 NLM_EXTERN ValNodePtr GetSourceQualFieldListFromBioSource (BioSourcePtr biop)
2472 {
2473   SubSourcePtr ssp;
2474   OrgModPtr    mod;
2475   ValNodePtr   list = NULL, vnp;
2476   Int4         i;
2477   PCRReactionSetPtr ps;
2478   PCRPrimerPtr      pp;
2479 
2480   if (biop == NULL) {
2481     return NULL;
2482   }
2483 
2484   vnp = ValNodeNew (NULL);
2485   vnp->choice = SourceQualChoice_textqual;
2486   vnp->data.intvalue = Source_qual_taxname;
2487   ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2488 
2489   /* add other tax values */
2490   if (biop->org != NULL && !StringHasNoText (biop->org->common)) {
2491     vnp = ValNodeNew (NULL);
2492     vnp->choice = SourceQualChoice_textqual;
2493     vnp->data.intvalue = Source_qual_common_name;
2494     ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2495   }
2496   if (biop->org != NULL && biop->org->orgname != NULL) {
2497     if (!StringHasNoText (biop->org->orgname->lineage)) {
2498       vnp = ValNodeNew (NULL);
2499       vnp->choice = SourceQualChoice_textqual;
2500       vnp->data.intvalue = Source_qual_lineage;
2501       ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2502     }
2503     if (!StringHasNoText (biop->org->orgname->div)) {
2504       vnp = ValNodeNew (NULL);
2505       vnp->choice = SourceQualChoice_textqual;
2506       vnp->data.intvalue = Source_qual_division;
2507       ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2508     }
2509   }
2510 
2511   /* add taxid */
2512   if (HasTaxonomyID(biop)) {
2513     vnp = ValNodeNew (NULL);
2514     vnp->choice = SourceQualChoice_textqual;
2515     vnp->data.intvalue = Source_qual_taxid;
2516     ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2517   }
2518 
2519   /* add subtypes */
2520   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
2521     for (i = 0;
2522          i < NUM_srcqual_scqual && (srcqual_scqual[i].typeflag != IS_SUBSRC || srcqual_scqual[i].subtype != ssp->subtype);
2523          i++) {}
2524     if (i < NUM_srcqual_scqual) {
2525       vnp = ValNodeNew (NULL);
2526       vnp->choice = SourceQualChoice_textqual;
2527       vnp->data.intvalue = srcqual_scqual[i].srcqual;
2528       ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2529     }
2530   }
2531   /* add orgmods */
2532   if (biop->org != NULL && biop->org->orgname != NULL) {
2533     for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next) {
2534       for (i = 0;
2535           i < NUM_srcqual_scqual && (srcqual_scqual[i].typeflag != IS_ORGMOD || srcqual_scqual[i].subtype != mod->subtype);
2536           i++) {}
2537       if (i < NUM_srcqual_scqual) {
2538         vnp = ValNodeNew (NULL);
2539         vnp->choice = SourceQualChoice_textqual;
2540         vnp->data.intvalue = srcqual_scqual[i].srcqual;
2541         ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2542       }
2543     }
2544   }
2545 
2546   /* add PCR primers */
2547   for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
2548     for (pp = ps->forward; pp != NULL; pp = pp->next) {
2549       if (!StringHasNoText (pp->name)) {
2550         vnp = ValNodeNew (NULL);
2551         vnp->choice = SourceQualChoice_textqual;
2552         vnp->data.intvalue = Source_qual_fwd_primer_name;
2553         ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2554       }
2555       if (!StringHasNoText (pp->seq)) {
2556         vnp = ValNodeNew (NULL);
2557         vnp->choice = SourceQualChoice_textqual;
2558         vnp->data.intvalue = Source_qual_fwd_primer_seq;
2559         ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2560       }
2561     }
2562     for (pp = ps->reverse; pp != NULL; pp = pp->next) {
2563       if (!StringHasNoText (pp->name)) {
2564         vnp = ValNodeNew (NULL);
2565         vnp->choice = SourceQualChoice_textqual;
2566         vnp->data.intvalue = Source_qual_rev_primer_name;
2567         ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2568       }
2569       if (!StringHasNoText (pp->seq)) {
2570         vnp = ValNodeNew (NULL);
2571         vnp->choice = SourceQualChoice_textqual;
2572         vnp->data.intvalue = Source_qual_rev_primer_seq;
2573         ValNodeAddPointer (&list, FieldType_source_qual, vnp);
2574       }
2575     }
2576   }
2577 
2578   return list;
2579 }
2580 
2581 
AllowSourceQualMulti(SourceQualChoicePtr s)2582 NLM_EXTERN Boolean AllowSourceQualMulti (SourceQualChoicePtr s)
2583 {
2584   Boolean rval = FALSE;
2585 
2586   if (s == NULL || s->choice != SourceQualChoice_textqual || s->data.ptrvalue == NULL) {
2587     return FALSE;
2588   } else if (s->data.intvalue == Source_qual_culture_collection
2589              || s->data.intvalue == Source_qual_bio_material
2590              || s->data.intvalue == Source_qual_specimen_voucher
2591              || s->data.intvalue == Source_qual_dbxref
2592              || s->data.intvalue == Source_qual_fwd_primer_name
2593              || s->data.intvalue == Source_qual_fwd_primer_seq
2594              || s->data.intvalue == Source_qual_rev_primer_name
2595              || s->data.intvalue == Source_qual_rev_primer_seq) {
2596     rval = TRUE;
2597   }
2598   return rval;
2599 }
2600 
2601 
GetOrgModSearch(void)2602 NLM_EXTERN TextFsaPtr GetOrgModSearch (void)
2603 {
2604   TextFsaPtr tags;
2605 
2606   tags = TextFsaNew();
2607 
2608   TextFsaAdd (tags, "pathovar");
2609   TextFsaAdd (tags, "serovar");
2610   TextFsaAdd (tags, "strain");
2611   TextFsaAdd (tags, "sub-species");
2612   TextFsaAdd (tags, "variety");
2613 
2614   /* abbreviations */
2615   TextFsaAdd (tags, "subsp.");
2616   TextFsaAdd (tags, "var.");
2617   TextFsaAdd (tags, "str.");
2618 
2619   return tags;
2620 }
2621 
2622 
2623 typedef struct srclocgenome {
2624   Int4 srcloc;
2625   Int4 genome;
2626   CharPtr name;
2627 } SrcLocGenomeData, PNTR SrcLocGenomePtr;
2628 
2629 static SrcLocGenomeData srcloc_genome[] = {
2630  { Source_location_unknown , GENOME_unknown , " " } ,
2631  { Source_location_genomic , GENOME_genomic , "genomic" } ,
2632  { Source_location_chloroplast , GENOME_chloroplast , "chloroplast" } ,
2633  { Source_location_chromoplast , GENOME_chromoplast , "chromoplast" } ,
2634  { Source_location_kinetoplast , GENOME_kinetoplast , "kinetoplast" } ,
2635  { Source_location_mitochondrion , GENOME_mitochondrion , "mitochondrion" } ,
2636  { Source_location_plastid , GENOME_plastid , "plastid" } ,
2637  { Source_location_macronuclear , GENOME_macronuclear , "macronuclear" } ,
2638  { Source_location_extrachrom , GENOME_extrachrom , "extrachromosomal" } ,
2639  { Source_location_plasmid , GENOME_plasmid , "plasmid" } ,
2640  { Source_location_transposon , GENOME_transposon , "transposon" } ,
2641  { Source_location_insertion_seq , GENOME_insertion_seq , "insertion-seq" } ,
2642  { Source_location_cyanelle , GENOME_cyanelle , "cyanelle" } ,
2643  { Source_location_proviral , GENOME_proviral , "proviral" } ,
2644  { Source_location_virion , GENOME_virion , "virion" } ,
2645  { Source_location_nucleomorph , GENOME_nucleomorph , "nucleomorph" } ,
2646  { Source_location_apicoplast , GENOME_apicoplast , "apicoplast" } ,
2647  { Source_location_leucoplast , GENOME_leucoplast , "leucoplast" } ,
2648  { Source_location_proplastid , GENOME_proplastid , "proplastid" } ,
2649  { Source_location_endogenous_virus , GENOME_endogenous_virus , "endogenous-virus" } ,
2650  { Source_location_hydrogenosome , GENOME_hydrogenosome , "hydrogenosome" } ,
2651  { Source_location_chromosome , GENOME_chromosome , "chromosome" } ,
2652  { Source_location_chromatophore , GENOME_chromatophore , "chromatophore" } };
2653 
2654 #define NUM_srcloc_genome sizeof (srcloc_genome) / sizeof (SrcLocGenomeData)
2655 
GenomeFromSrcLoc(Int4 srcloc)2656 NLM_EXTERN Int4 GenomeFromSrcLoc (Int4 srcloc)
2657 {
2658   Int4 i;
2659 
2660   for (i = 0; i < NUM_srcloc_genome; i++) {
2661     if (srcloc_genome[i].srcloc == srcloc) {
2662       return srcloc_genome[i].genome;
2663     }
2664   }
2665   return -1;
2666 }
2667 
2668 
SrcLocFromGenome(Int4 genome)2669 NLM_EXTERN Int4 SrcLocFromGenome (Int4 genome)
2670 {
2671   Int4 i;
2672 
2673   for (i = 0; i < NUM_srcloc_genome; i++) {
2674     if (srcloc_genome[i].genome == genome) {
2675       return srcloc_genome[i].srcloc;
2676     }
2677   }
2678   return -1;
2679 }
2680 
2681 
2682 
LocNameFromGenome(Int4 genome)2683 NLM_EXTERN CharPtr LocNameFromGenome (Int4 genome)
2684 {
2685   Int4 i;
2686 
2687   for (i = 0; i < NUM_srcloc_genome; i++) {
2688     if (srcloc_genome[i].genome == genome) {
2689       return srcloc_genome[i].name;
2690     }
2691   }
2692   return NULL;
2693 }
2694 
2695 
GenomeFromLocName(CharPtr loc_name)2696 NLM_EXTERN Int4 GenomeFromLocName (CharPtr loc_name)
2697 {
2698   Int4 i;
2699 
2700   for (i = 0; i < NUM_srcloc_genome; i++) {
2701     if (StringICmp (srcloc_genome[i].name, loc_name) == 0) {
2702       return srcloc_genome[i].genome;
2703     }
2704   }
2705   return -1;
2706 }
2707 
2708 
GetLocationList(Boolean for_remove)2709 NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove)
2710 {
2711   ValNodePtr list = NULL, start = NULL;
2712   Int4 i;
2713 
2714   for (i = 0; i < NUM_srcloc_genome; i++) {
2715     if (for_remove && srcloc_genome[i].srcloc == Source_location_unknown) {
2716       ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave ("any"));
2717     } else {
2718       ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave (srcloc_genome[i].name));
2719     }
2720   }
2721   list = ValNodeSort (list, SortVnpByString);
2722   /* put mitochondrion and chloroplast at top of list */
2723   ValNodeAddPointer (&start, Source_location_mitochondrion, StringSave ("mitochondrion"));
2724   ValNodeAddPointer (&start, Source_location_chloroplast, StringSave ("chloroplast"));
2725   ValNodeLink (&start, list);
2726   list = start;
2727   return list;
2728 }
2729 
2730 
SrcLocationFieldFromValue(CharPtr value)2731 static ValNodePtr SrcLocationFieldFromValue (CharPtr value)
2732 {
2733   ValNodePtr field, sq;
2734   Int4 genome;
2735 
2736   genome = GenomeFromLocName(value);
2737   if (genome < 0) {
2738     return NULL;
2739   }
2740   sq = ValNodeNew (NULL);
2741   sq->choice = SourceQualValChoice_location;
2742   sq->data.intvalue = genome;
2743   field = ValNodeNew (NULL);
2744   field->choice = FieldType_source_qual;
2745   field->data.ptrvalue = sq;
2746   return field;
2747 }
2748 
2749 
2750 typedef struct srcorigorigin {
2751   Int4 srcorig;
2752   Int4 origin;
2753   CharPtr name;
2754 } SrcOrigOriginData, PNTR SrcrigOriginPtr;
2755 
2756 static SrcOrigOriginData srcorig_origin[] = {
2757  { Source_origin_unknown , 0 , "unknown" } ,
2758  { Source_origin_natural , 1 , "natural" } ,
2759  { Source_origin_natmut , 2 , "natmut" } ,
2760  { Source_origin_mut , 3 , "mut" } ,
2761  { Source_origin_artificial , 4 , "artificial" } ,
2762  { Source_origin_synthetic , 5 , "synthetic" } ,
2763  { Source_origin_other , 255 , "other" } };
2764 
2765 #define NUM_srcorig_origin sizeof (srcorig_origin) / sizeof (SrcOrigOriginData)
2766 
OriginFromSrcOrig(Int4 srcorig)2767 NLM_EXTERN Int4 OriginFromSrcOrig (Int4 srcorig)
2768 {
2769   Int4 i;
2770 
2771   for (i = 0; i < NUM_srcorig_origin; i++) {
2772     if (srcorig_origin[i].srcorig == srcorig) {
2773       return srcorig_origin[i].origin;
2774     }
2775   }
2776   return -1;
2777 }
2778 
2779 
SrcOrigFromOrigin(Int4 origin)2780 NLM_EXTERN Int4 SrcOrigFromOrigin (Int4 origin)
2781 {
2782   Int4 i;
2783 
2784   for (i = 0; i < NUM_srcorig_origin; i++) {
2785     if (srcorig_origin[i].origin == origin) {
2786       return srcorig_origin[i].srcorig;
2787     }
2788   }
2789   return -1;
2790 }
2791 
2792 
OriginNameFromOrigin(Int4 origin)2793 NLM_EXTERN CharPtr OriginNameFromOrigin (Int4 origin)
2794 {
2795   Int4 i;
2796 
2797   for (i = 0; i < NUM_srcorig_origin; i++) {
2798     if (srcorig_origin[i].origin == origin) {
2799       return srcorig_origin[i].name;
2800     }
2801   }
2802   return NULL;
2803 }
2804 
2805 
OriginFromOriginName(CharPtr origin_name)2806 static Int4 OriginFromOriginName (CharPtr origin_name)
2807 {
2808   Int4 i;
2809 
2810   for (i = 0; i < NUM_srcorig_origin; i++) {
2811     if (StringCmp (srcorig_origin[i].name, origin_name) == 0) {
2812       return srcorig_origin[i].origin;
2813     }
2814   }
2815   return -1;
2816 }
2817 
2818 
GetOriginList(Boolean for_remove)2819 NLM_EXTERN ValNodePtr GetOriginList (Boolean for_remove)
2820 {
2821   ValNodePtr list = NULL;
2822   Int4 i;
2823 
2824   for (i = 0; i < NUM_srcorig_origin; i++) {
2825     if (for_remove && srcorig_origin[i].srcorig == Source_origin_unknown) {
2826       ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave ("any"));
2827     } else {
2828       ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave (srcorig_origin[i].name));
2829     }
2830   }
2831   return list;
2832 }
2833 
2834 
2835 /* special code for converting source features to source qualifier val lists */
SetSrcQualTextValue(ValNodePtr PNTR fields,Int4 srcqual,CharPtr val)2836 static void SetSrcQualTextValue (ValNodePtr PNTR fields, Int4 srcqual, CharPtr val)
2837 {
2838   SourceQualTextValPtr st;
2839 
2840   st = SourceQualTextValNew ();
2841   st->srcqual = srcqual;
2842   st->val = StringSave (val);
2843   ValNodeAddPointer (fields, SourceQualValChoice_textqual, st);
2844 }
2845 
2846 
SourceQualValsFromOrgMods(OrgModPtr mod)2847 static ValNodePtr SourceQualValsFromOrgMods (OrgModPtr mod)
2848 {
2849   Int4 src_qual;
2850   ValNodePtr fields = NULL;
2851 
2852   while (mod != NULL) {
2853     src_qual = GetSrcQualFromSubSrcOrOrgMod (mod->subtype, TRUE);
2854     if (src_qual > -1) {
2855       SetSrcQualTextValue (&fields, src_qual, mod->subname);
2856     }
2857     mod = mod->next;
2858   }
2859   return fields;
2860 }
2861 
2862 
SourceQualValsFromSubSrcs(SubSourcePtr ssp)2863 static ValNodePtr SourceQualValsFromSubSrcs (SubSourcePtr ssp)
2864 {
2865   Int4 src_qual;
2866   ValNodePtr fields = NULL;
2867 
2868   while (ssp != NULL) {
2869     src_qual = GetSrcQualFromSubSrcOrOrgMod (ssp->subtype, FALSE);
2870     if (src_qual > -1) {
2871       SetSrcQualTextValue (&fields, src_qual, ssp->name);
2872     }
2873     ssp = ssp->next;
2874   }
2875   return fields;
2876 }
2877 
2878 
SourceQualValsFromSynonyms(ValNodePtr syn)2879 static ValNodePtr SourceQualValsFromSynonyms (ValNodePtr syn)
2880 {
2881   ValNodePtr fields = NULL;
2882 
2883   while (syn != NULL) {
2884     SetSrcQualTextValue (&fields, Source_qual_synonym, syn->data.ptrvalue);
2885     syn = syn->next;
2886   }
2887   return fields;
2888 }
2889 
2890 
2891 NLM_EXTERN CharPtr GetDbtagString (DbtagPtr db_tag);
2892 
SourceQualValsFromDbxrefs(ValNodePtr dbxref)2893 static ValNodePtr SourceQualValsFromDbxrefs (ValNodePtr dbxref)
2894 {
2895   ValNodePtr fields = NULL;
2896   CharPtr tmp;
2897 
2898   while (dbxref != NULL) {
2899     tmp = GetDbtagString (dbxref->data.ptrvalue);
2900     SetSrcQualTextValue (&fields, Source_qual_dbxref, tmp);
2901     dbxref = dbxref->next;
2902   }
2903   return fields;
2904 }
2905 
2906 
SourceQualValsFromBioSourcePtr(BioSourcePtr biop)2907 NLM_EXTERN ValNodePtr SourceQualValsFromBioSourcePtr (BioSourcePtr biop)
2908 {
2909   ValNodePtr fields = NULL;
2910   Int4 loc, origin;
2911 
2912   if (biop == NULL) {
2913     return NULL;
2914   }
2915 
2916   ValNodeLink (&fields, SourceQualValsFromSubSrcs (biop->subtype));
2917 
2918   /* genome */
2919   if (biop->genome != GENOME_unknown) {
2920     loc = SrcLocFromGenome (biop->genome);
2921     if (loc > -1) {
2922       ValNodeAddInt (&fields, SourceQualValChoice_location, loc);
2923     }
2924   }
2925   /* origin */
2926   if (biop->origin > 0) {
2927     origin = SrcOrigFromOrigin (biop->origin);
2928     if (origin > -1) {
2929       ValNodeAddInt (&fields, SourceQualValChoice_origin, origin);
2930     }
2931   }
2932   /* TODO: need focus */
2933 
2934 
2935   if (biop->org != NULL) {
2936     if (!StringHasNoText (biop->org->taxname)) {
2937       SetSrcQualTextValue (&fields, Source_qual_taxname, biop->org->taxname);
2938     }
2939     /* need common */
2940     if (!StringHasNoText (biop->org->common)) {
2941       SetSrcQualTextValue (&fields, Source_qual_common, biop->org->common);
2942     }
2943     /* dbxrefs */
2944     ValNodeLink (&fields, SourceQualValsFromDbxrefs (biop->org->db));
2945 
2946     /* add synonyms */
2947     SourceQualValsFromSynonyms (biop->org->syn);
2948 
2949     if (biop->org->orgname != NULL) {
2950       ValNodeLink (&fields, SourceQualValsFromOrgMods (biop->org->orgname->mod));
2951 
2952       /* lineage */
2953       if (!StringHasNoText (biop->org->orgname->lineage)) {
2954         SetSrcQualTextValue (&fields, Source_qual_lineage, biop->org->orgname->lineage);
2955       }
2956       /* div */
2957       if (!StringHasNoText (biop->org->orgname->div)) {
2958         SetSrcQualTextValue (&fields, Source_qual_division, biop->org->orgname->div);
2959       }
2960 
2961       /* gcode, mgcode */
2962       if (biop->org->orgname->gcode > 0) {
2963         ValNodeAddInt (&fields, SourceQualChoice_gcode, biop->org->orgname->gcode);
2964       }
2965       if (biop->org->orgname->mgcode > 0) {
2966         ValNodeAddInt (&fields, SourceQualChoice_mgcode, biop->org->orgname->mgcode);
2967       }
2968 
2969     }
2970 
2971   }
2972 
2973   return fields;
2974 }
2975 
2976 
SetSourceQualValOnBioSource(BioSourcePtr biop,ValNodePtr src_qual)2977 static void SetSourceQualValOnBioSource (BioSourcePtr biop, ValNodePtr src_qual)
2978 {
2979   ValNode vn;
2980   SourceQualTextValPtr st;
2981 
2982   if (biop == NULL || src_qual == NULL) {
2983     return;
2984   }
2985 
2986   vn.next = NULL;
2987   switch (src_qual->choice) {
2988     case SourceQualValChoice_textqual:
2989       st = (SourceQualTextValPtr) src_qual->data.ptrvalue;
2990       if (st != NULL) {
2991         vn.choice = SourceQualChoice_textqual;
2992         vn.data.intvalue = st->srcqual;
2993         if (AllowSourceQualMulti (src_qual)) {
2994           SetSourceQualInBioSource (biop, &vn, NULL, st->val, ExistingTextOption_add_qual);
2995         } else {
2996           SetSourceQualInBioSource (biop, &vn, NULL, st->val, ExistingTextOption_replace_old);
2997         }
2998       }
2999       break;
3000     case SourceQualValChoice_location:
3001       vn.choice = SourceQualChoice_location;
3002       vn.data.intvalue = src_qual->data.intvalue;
3003       SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old);
3004       break;
3005     case SourceQualValChoice_origin:
3006       vn.choice = SourceQualChoice_origin;
3007       vn.data.intvalue = src_qual->data.intvalue;
3008       SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old);
3009       break;
3010     case SourceQualValChoice_gcode:
3011       vn.choice = SourceQualChoice_gcode;
3012       vn.data.intvalue = src_qual->data.intvalue;
3013       SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old);
3014       break;
3015     case SourceQualValChoice_mgcode:
3016       vn.choice = SourceQualChoice_mgcode;
3017       vn.data.intvalue = src_qual->data.intvalue;
3018       SetSourceQualInBioSource (biop, &vn, NULL, NULL, ExistingTextOption_replace_old);
3019       break;
3020   }
3021 }
3022 
3023 
BioSourceFromSourceQualVals(ValNodePtr fields)3024 NLM_EXTERN BioSourcePtr BioSourceFromSourceQualVals (ValNodePtr fields)
3025 {
3026   BioSourcePtr biop = NULL;
3027   ValNodePtr vnp;
3028 
3029   if (fields != NULL) {
3030     biop = BioSourceNew ();
3031 
3032     for (vnp = fields; vnp != NULL; vnp = vnp->next) {
3033       SetSourceQualValOnBioSource (biop, vnp);
3034     }
3035   }
3036   return biop;
3037 }
3038 
3039 
3040 
3041 
3042 
3043 typedef struct cdsgeneprotfieldname {
3044   Int4 field;
3045   CharPtr name;
3046 } CDSGeneProtFieldNameData, PNTR CDSGeneProtFieldNamePtr;
3047 
3048 static CDSGeneProtFieldNameData cdsgeneprotfield_name[] = {
3049 { CDSGeneProt_field_cds_comment , "CDS comment" } ,
3050 { CDSGeneProt_field_cds_inference , "CDS inference" } ,
3051 { CDSGeneProt_field_codon_start , "codon-start" } ,
3052 { CDSGeneProt_field_gene_locus , "gene locus" } ,
3053 { CDSGeneProt_field_gene_description , "gene description" } ,
3054 { CDSGeneProt_field_gene_comment , "gene comment" } ,
3055 { CDSGeneProt_field_gene_inference, "gene inference" } ,
3056 { CDSGeneProt_field_gene_allele , "gene allele" } ,
3057 { CDSGeneProt_field_gene_maploc , "gene maploc" } ,
3058 { CDSGeneProt_field_gene_locus_tag , "gene locus tag" } ,
3059 { CDSGeneProt_field_gene_synonym , "gene synonym" } ,
3060 { CDSGeneProt_field_gene_old_locus_tag , "gene old locus tag" } ,
3061 { CDSGeneProt_field_mrna_product , "mRNA product" } ,
3062 { CDSGeneProt_field_mrna_comment , "mRNA comment" } ,
3063 { CDSGeneProt_field_prot_name , "protein name" } ,
3064 { CDSGeneProt_field_prot_description , "protein description" } ,
3065 { CDSGeneProt_field_prot_ec_number , "protein EC number" } ,
3066 { CDSGeneProt_field_prot_activity , "protein activity" } ,
3067 { CDSGeneProt_field_prot_comment , "protein comment" } ,
3068 { CDSGeneProt_field_mat_peptide_name , "mat-peptide name" } ,
3069 { CDSGeneProt_field_mat_peptide_description ,  "mat-peptide description" } ,
3070 { CDSGeneProt_field_mat_peptide_ec_number , "mat-peptide EC number" } ,
3071 { CDSGeneProt_field_mat_peptide_activity , "mat-peptide activity" } ,
3072 { CDSGeneProt_field_mat_peptide_comment , "mat-peptide comment" } };
3073 
3074 #define NUM_cdsgeneprotfield_name sizeof (cdsgeneprotfield_name) / sizeof (CDSGeneProtFieldNameData)
3075 
CDSGeneProtNameFromField(Int4 field)3076 NLM_EXTERN CharPtr CDSGeneProtNameFromField (Int4 field)
3077 {
3078   Int4 i;
3079 
3080   for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
3081     if (cdsgeneprotfield_name[i].field == field) {
3082       return cdsgeneprotfield_name[i].name;
3083     }
3084   }
3085   return NULL;
3086 }
3087 
3088 
CDSGeneProtFieldFromName(CharPtr str)3089 static Int4 CDSGeneProtFieldFromName (CharPtr str)
3090 {
3091   Int4 i;
3092 
3093   for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
3094     if (Matchnamestring (cdsgeneprotfield_name[i].name, str)) {
3095       return cdsgeneprotfield_name[i].field;
3096     }
3097   }
3098   return -1;
3099 }
3100 
3101 
AddAllCDSGeneProtFieldsToChoiceList(ValNodePtr PNTR field_list)3102 NLM_EXTERN void AddAllCDSGeneProtFieldsToChoiceList (ValNodePtr PNTR field_list)
3103 {
3104   Int4 i;
3105 
3106   ValNodeAddPointer (field_list, CDSGeneProt_field_prot_name, StringSave ("protein name"));
3107   ValNodeAddPointer (field_list, CDSGeneProt_field_prot_description, StringSave ("protein description"));
3108 
3109   for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
3110     ValNodeAddPointer (field_list, cdsgeneprotfield_name[i].field, StringSave (cdsgeneprotfield_name[i].name));
3111   }
3112 }
3113 
3114 
MakeCDSGeneProtFieldTypeList(void)3115 static ValNodePtr MakeCDSGeneProtFieldTypeList (void)
3116 {
3117   Int4 i;
3118   ValNodePtr field_list = NULL;
3119 
3120   for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
3121     ValNodeAddInt (&field_list, FieldType_cds_gene_prot, cdsgeneprotfield_name[i].field);
3122   }
3123   return field_list;
3124 }
3125 
3126 
3127 typedef struct cdsgeneprotfeatname {
3128   Int4 feature_type;
3129   CharPtr name;
3130 } CDSGeneProtFeatNameData, PNTR CDSGeneProtFeatNamePtr;
3131 
3132 static CDSGeneProtFeatNameData cdsgeneprotfeat_name[] = {
3133 { CDSGeneProt_feature_type_constraint_gene , "gene" } ,
3134 { CDSGeneProt_feature_type_constraint_mRNA , "mRNA" } ,
3135 { CDSGeneProt_feature_type_constraint_cds , "CDS" } ,
3136 { CDSGeneProt_feature_type_constraint_prot , "protein" } ,
3137 { CDSGeneProt_feature_type_constraint_mat_peptide , "mat-peptide" }};
3138 
3139 #define NUM_cdsgeneprotfeat_name sizeof (cdsgeneprotfeat_name) / sizeof (CDSGeneProtFeatNameData)
3140 
CDSGeneProtFeatureNameFromFeatureType(Int4 feature_type)3141 NLM_EXTERN CharPtr CDSGeneProtFeatureNameFromFeatureType (Int4 feature_type)
3142 {
3143   Int4 i;
3144 
3145   for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) {
3146     if (cdsgeneprotfeat_name[i].feature_type == feature_type) {
3147       return cdsgeneprotfeat_name[i].name;
3148     }
3149   }
3150   return NULL;
3151 }
3152 
3153 
AddAllCDSGeneProtFeaturesToChoiceList(ValNodePtr PNTR field_list)3154 NLM_EXTERN void AddAllCDSGeneProtFeaturesToChoiceList (ValNodePtr PNTR field_list)
3155 {
3156   Int4 i;
3157 
3158   for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) {
3159     ValNodeAddPointer (field_list, cdsgeneprotfeat_name[i].feature_type, StringSave (cdsgeneprotfeat_name[i].name));
3160   }
3161 }
3162 
3163 
IsCDSGeneProtFieldMatPeptideRelated(Int4 val)3164 static Boolean IsCDSGeneProtFieldMatPeptideRelated (Int4 val)
3165 {
3166   if (val == CDSGeneProt_field_mat_peptide_name
3167       || val == CDSGeneProt_field_mat_peptide_description
3168       || val == CDSGeneProt_field_mat_peptide_ec_number
3169       || val == CDSGeneProt_field_mat_peptide_activity
3170       || val == CDSGeneProt_field_mat_peptide_comment) {
3171     return TRUE;
3172   } else {
3173     return FALSE;
3174   }
3175 }
3176 
3177 
IsFieldTypeMatPeptideRelated(FieldTypePtr field)3178 static Boolean IsFieldTypeMatPeptideRelated (FieldTypePtr field)
3179 {
3180   Boolean rval = FALSE;
3181   FeatureFieldPtr ff;
3182 
3183   if (field == NULL) {
3184     rval = FALSE;
3185   } else if ((field->choice == FieldType_feature_field
3186        && (ff = field->data.ptrvalue) != NULL
3187        && ff->type == Macro_feature_type_mat_peptide_aa)
3188       || (field->choice == FieldType_cds_gene_prot
3189           && IsCDSGeneProtFieldMatPeptideRelated(field->data.intvalue))) {
3190     rval = TRUE;
3191   } else {
3192     rval = FALSE;
3193   }
3194   return rval;
3195 }
3196 
3197 
IsConstraintChoiceMatPeptideRelated(ConstraintChoicePtr constraint)3198 static Boolean IsConstraintChoiceMatPeptideRelated (ConstraintChoicePtr constraint)
3199 {
3200   CDSGeneProtQualConstraintPtr cq;
3201   FieldConstraintPtr fq;
3202   Boolean            rval = FALSE;
3203 
3204   if (constraint == NULL) {
3205     rval = FALSE;
3206   } else if (constraint->choice == ConstraintChoice_cdsgeneprot_qual) {
3207     cq = (CDSGeneProtQualConstraintPtr) constraint->data.ptrvalue;
3208     if (cq != NULL && cq->field1 != NULL
3209         && IsCDSGeneProtFieldMatPeptideRelated (cq->field1->data.intvalue)) {
3210       rval = TRUE;
3211     } else {
3212       rval = FALSE;
3213     }
3214   } else if (constraint->choice == ConstraintChoice_field) {
3215     fq = (FieldConstraintPtr) constraint->data.ptrvalue;
3216     if (fq != NULL && IsFieldTypeMatPeptideRelated (fq->field)) {
3217       rval = TRUE;
3218     } else {
3219       rval = FALSE;
3220     }
3221   } else {
3222     rval = FALSE;
3223   }
3224   return rval;
3225 }
3226 
3227 
FeatureTypeFromCDSGeneProtField(Uint2 cds_gene_prot_field)3228 static Int2 FeatureTypeFromCDSGeneProtField (Uint2 cds_gene_prot_field)
3229 {
3230   Int2 feat_type = Macro_feature_type_any;
3231 
3232   switch (cds_gene_prot_field) {
3233     case CDSGeneProt_field_cds_comment:
3234     case CDSGeneProt_field_cds_inference:
3235     case CDSGeneProt_field_codon_start:
3236       feat_type = Macro_feature_type_cds;
3237       break;
3238     case CDSGeneProt_field_gene_locus:
3239     case CDSGeneProt_field_gene_description:
3240     case CDSGeneProt_field_gene_comment:
3241     case CDSGeneProt_field_gene_allele:
3242     case CDSGeneProt_field_gene_maploc:
3243     case CDSGeneProt_field_gene_locus_tag:
3244     case CDSGeneProt_field_gene_synonym:
3245     case CDSGeneProt_field_gene_old_locus_tag:
3246     case CDSGeneProt_field_gene_inference:
3247       feat_type = Macro_feature_type_gene;
3248       break;
3249     case CDSGeneProt_field_mrna_product:
3250     case CDSGeneProt_field_mrna_comment:
3251       feat_type = Macro_feature_type_mRNA;
3252       break;
3253     case CDSGeneProt_field_prot_name:
3254     case CDSGeneProt_field_prot_description:
3255     case CDSGeneProt_field_prot_ec_number:
3256     case CDSGeneProt_field_prot_activity:
3257     case CDSGeneProt_field_prot_comment:
3258       feat_type = Macro_feature_type_prot;
3259       break;
3260     case CDSGeneProt_field_mat_peptide_name:
3261     case CDSGeneProt_field_mat_peptide_description:
3262     case CDSGeneProt_field_mat_peptide_ec_number:
3263     case CDSGeneProt_field_mat_peptide_activity:
3264     case CDSGeneProt_field_mat_peptide_comment:
3265       feat_type = Macro_feature_type_mat_peptide_aa;
3266       break;
3267   }
3268   return feat_type;
3269 }
3270 
3271 
FeatureFieldFromCDSGeneProtField(Uint2 cds_gene_prot_field)3272 NLM_EXTERN FeatureFieldPtr FeatureFieldFromCDSGeneProtField (Uint2 cds_gene_prot_field)
3273 {
3274   FeatureFieldPtr f = NULL;
3275 
3276   switch (cds_gene_prot_field) {
3277     case CDSGeneProt_field_cds_comment:
3278       f = FeatureFieldNew ();
3279       f->type = Macro_feature_type_cds;
3280       f->field = ValNodeNew (NULL);
3281       f->field->choice = FeatQualChoice_legal_qual;
3282       f->field->data.intvalue = Feat_qual_legal_note;
3283       break;
3284     case CDSGeneProt_field_cds_inference:
3285       f = FeatureFieldNew ();
3286       f->type = Macro_feature_type_cds;
3287       f->field = ValNodeNew (NULL);
3288       f->field->choice = FeatQualChoice_legal_qual;
3289       f->field->data.intvalue = Feat_qual_legal_inference;
3290       break;
3291     case CDSGeneProt_field_codon_start:
3292       f = FeatureFieldNew ();
3293       f->type = Macro_feature_type_cds;
3294       f->field = ValNodeNew (NULL);
3295       f->field->choice = FeatQualChoice_legal_qual;
3296       f->field->data.intvalue = Feat_qual_legal_codon_start;
3297       break;
3298     case CDSGeneProt_field_gene_locus:
3299       f = FeatureFieldNew ();
3300       f->type = Macro_feature_type_gene;
3301       f->field = ValNodeNew (NULL);
3302       f->field->choice = FeatQualChoice_legal_qual;
3303       f->field->data.intvalue = Feat_qual_legal_gene;
3304       break;
3305     case CDSGeneProt_field_gene_description:
3306       f = FeatureFieldNew ();
3307       f->type = Macro_feature_type_gene;
3308       f->field = ValNodeNew (NULL);
3309       f->field->choice = FeatQualChoice_legal_qual;
3310       f->field->data.intvalue = Feat_qual_legal_gene_description;
3311       break;
3312     case CDSGeneProt_field_gene_comment:
3313       f = FeatureFieldNew ();
3314       f->type = Macro_feature_type_gene;
3315       f->field = ValNodeNew (NULL);
3316       f->field->choice = FeatQualChoice_legal_qual;
3317       f->field->data.intvalue = Feat_qual_legal_note;
3318       break;
3319     case CDSGeneProt_field_gene_allele:
3320       f = FeatureFieldNew ();
3321       f->type = Macro_feature_type_gene;
3322       f->field = ValNodeNew (NULL);
3323       f->field->choice = FeatQualChoice_legal_qual;
3324       f->field->data.intvalue = Feat_qual_legal_allele;
3325       break;
3326     case CDSGeneProt_field_gene_maploc:
3327       f = FeatureFieldNew ();
3328       f->type = Macro_feature_type_gene;
3329       f->field = ValNodeNew (NULL);
3330       f->field->choice = FeatQualChoice_legal_qual;
3331       f->field->data.intvalue = Feat_qual_legal_map;
3332       break;
3333     case CDSGeneProt_field_gene_locus_tag:
3334       f = FeatureFieldNew ();
3335       f->type = Macro_feature_type_gene;
3336       f->field = ValNodeNew (NULL);
3337       f->field->choice = FeatQualChoice_legal_qual;
3338       f->field->data.intvalue = Feat_qual_legal_locus_tag;
3339       break;
3340     case CDSGeneProt_field_gene_synonym:
3341       f = FeatureFieldNew ();
3342       f->type = Macro_feature_type_gene;
3343       f->field = ValNodeNew (NULL);
3344       f->field->choice = FeatQualChoice_legal_qual;
3345       f->field->data.intvalue = Feat_qual_legal_synonym;
3346       break;
3347     case CDSGeneProt_field_gene_old_locus_tag:
3348       f = FeatureFieldNew ();
3349       f->type = Macro_feature_type_gene;
3350       f->field = ValNodeNew (NULL);
3351       f->field->choice = FeatQualChoice_legal_qual;
3352       f->field->data.intvalue = Feat_qual_legal_old_locus_tag;
3353       break;
3354     case CDSGeneProt_field_gene_inference:
3355       f = FeatureFieldNew ();
3356       f->type = Macro_feature_type_gene;
3357       f->field = ValNodeNew (NULL);
3358       f->field->choice = FeatQualChoice_legal_qual;
3359       f->field->data.intvalue = Feat_qual_legal_inference;
3360       break;
3361     case CDSGeneProt_field_mrna_product:
3362       f = FeatureFieldNew ();
3363       f->type = Macro_feature_type_mRNA;
3364       f->field = ValNodeNew (NULL);
3365       f->field->choice = FeatQualChoice_legal_qual;
3366       f->field->data.intvalue = Feat_qual_legal_product;
3367       break;
3368     case CDSGeneProt_field_mrna_comment:
3369       f = FeatureFieldNew ();
3370       f->type = Macro_feature_type_mRNA;
3371       f->field = ValNodeNew (NULL);
3372       f->field->choice = FeatQualChoice_legal_qual;
3373       f->field->data.intvalue = Feat_qual_legal_note;
3374       break;
3375     case CDSGeneProt_field_prot_name:
3376       f = FeatureFieldNew ();
3377       f->type = Macro_feature_type_prot;
3378       f->field = ValNodeNew (NULL);
3379       f->field->choice = FeatQualChoice_legal_qual;
3380       f->field->data.intvalue = Feat_qual_legal_product;
3381       break;
3382     case CDSGeneProt_field_prot_description:
3383       f = FeatureFieldNew ();
3384       f->type = Macro_feature_type_prot;
3385       f->field = ValNodeNew (NULL);
3386       f->field->choice = FeatQualChoice_legal_qual;
3387       f->field->data.intvalue = Feat_qual_legal_description;
3388       break;
3389     case CDSGeneProt_field_prot_ec_number:
3390       f = FeatureFieldNew ();
3391       f->type = Macro_feature_type_prot;
3392       f->field = ValNodeNew (NULL);
3393       f->field->choice = FeatQualChoice_legal_qual;
3394       f->field->data.intvalue = Feat_qual_legal_ec_number;
3395       break;
3396     case CDSGeneProt_field_prot_activity:
3397       f = FeatureFieldNew ();
3398       f->type = Macro_feature_type_prot;
3399       f->field = ValNodeNew (NULL);
3400       f->field->choice = FeatQualChoice_legal_qual;
3401       f->field->data.intvalue = Feat_qual_legal_activity;
3402       break;
3403     case CDSGeneProt_field_prot_comment:
3404       f = FeatureFieldNew ();
3405       f->type = Macro_feature_type_prot;
3406       f->field = ValNodeNew (NULL);
3407       f->field->choice = FeatQualChoice_legal_qual;
3408       f->field->data.intvalue = Feat_qual_legal_note;
3409       break;
3410     case CDSGeneProt_field_mat_peptide_name:
3411       f = FeatureFieldNew ();
3412       f->type = Macro_feature_type_mat_peptide_aa;
3413       f->field = ValNodeNew (NULL);
3414       f->field->choice = FeatQualChoice_legal_qual;
3415       f->field->data.intvalue = Feat_qual_legal_product;
3416       break;
3417     case CDSGeneProt_field_mat_peptide_description:
3418       f = FeatureFieldNew ();
3419       f->type = Macro_feature_type_mat_peptide_aa;
3420       f->field = ValNodeNew (NULL);
3421       f->field->choice = FeatQualChoice_legal_qual;
3422       f->field->data.intvalue = Feat_qual_legal_description;
3423       break;
3424     case CDSGeneProt_field_mat_peptide_ec_number:
3425       f = FeatureFieldNew ();
3426       f->type = Macro_feature_type_mat_peptide_aa;
3427       f->field = ValNodeNew (NULL);
3428       f->field->choice = FeatQualChoice_legal_qual;
3429       f->field->data.intvalue = Feat_qual_legal_ec_number;
3430       break;
3431     case CDSGeneProt_field_mat_peptide_activity:
3432       f = FeatureFieldNew ();
3433       f->type = Macro_feature_type_mat_peptide_aa;
3434       f->field = ValNodeNew (NULL);
3435       f->field->choice = FeatQualChoice_legal_qual;
3436       f->field->data.intvalue = Feat_qual_legal_activity;
3437       break;
3438     case CDSGeneProt_field_mat_peptide_comment:
3439       f = FeatureFieldNew ();
3440       f->type = Macro_feature_type_mat_peptide_aa;
3441       f->field = ValNodeNew (NULL);
3442       f->field->choice = FeatQualChoice_legal_qual;
3443       f->field->data.intvalue = Feat_qual_legal_note;
3444       break;
3445   }
3446   return f;
3447 }
3448 
3449 
CDSGeneProtFieldFromFeatureField(FeatureFieldPtr ffp)3450 static Uint2 CDSGeneProtFieldFromFeatureField (FeatureFieldPtr ffp)
3451 {
3452   Uint2 cds_gene_prot_field = 0;
3453 
3454   if (ffp != NULL && ffp->field != NULL && ffp->field->choice == FeatQualChoice_legal_qual) {
3455     switch (ffp->field->data.intvalue) {
3456       case Feat_qual_legal_note:
3457         switch (ffp->type) {
3458           case Macro_feature_type_cds:
3459             cds_gene_prot_field = CDSGeneProt_field_cds_comment;
3460             break;
3461           case Macro_feature_type_gene:
3462             cds_gene_prot_field = CDSGeneProt_field_gene_comment;
3463             break;
3464           case Macro_feature_type_mRNA:
3465             cds_gene_prot_field = CDSGeneProt_field_mrna_comment;
3466             break;
3467           case Macro_feature_type_prot:
3468             cds_gene_prot_field = CDSGeneProt_field_prot_comment;
3469             break;
3470           case Macro_feature_type_mat_peptide_aa:
3471             cds_gene_prot_field = CDSGeneProt_field_mat_peptide_comment;
3472             break;
3473         }
3474         break;
3475       case Feat_qual_legal_inference:
3476         switch (ffp->type) {
3477           case Macro_feature_type_cds:
3478             cds_gene_prot_field = CDSGeneProt_field_cds_inference;
3479             break;
3480           case Macro_feature_type_gene:
3481             cds_gene_prot_field = CDSGeneProt_field_gene_inference;
3482             break;
3483         }
3484         break;
3485       case Feat_qual_legal_codon_start:
3486         cds_gene_prot_field = CDSGeneProt_field_codon_start;
3487         break;
3488       case Feat_qual_legal_gene:
3489         cds_gene_prot_field = CDSGeneProt_field_gene_locus;
3490         break;
3491       case Feat_qual_legal_gene_description:
3492         cds_gene_prot_field = CDSGeneProt_field_gene_description;
3493         break;
3494       case Feat_qual_legal_allele:
3495         cds_gene_prot_field = CDSGeneProt_field_gene_allele;
3496         break;
3497       case Feat_qual_legal_map:
3498         cds_gene_prot_field = CDSGeneProt_field_gene_maploc;
3499         break;
3500       case Feat_qual_legal_locus_tag:
3501         cds_gene_prot_field = CDSGeneProt_field_gene_locus_tag;
3502         break;
3503       case Feat_qual_legal_synonym:
3504         cds_gene_prot_field = CDSGeneProt_field_gene_synonym;
3505         break;
3506       case Feat_qual_legal_old_locus_tag:
3507         cds_gene_prot_field = CDSGeneProt_field_gene_old_locus_tag;
3508         break;
3509       case Feat_qual_legal_product:
3510         switch (ffp->type) {
3511           case Macro_feature_type_mRNA:
3512             cds_gene_prot_field = CDSGeneProt_field_mrna_product;
3513             break;
3514           case Macro_feature_type_prot:
3515             cds_gene_prot_field = CDSGeneProt_field_prot_name;
3516             break;
3517           case Macro_feature_type_mat_peptide_aa:
3518             cds_gene_prot_field = CDSGeneProt_field_mat_peptide_name;
3519             break;
3520         }
3521         break;
3522       case Feat_qual_legal_description:
3523         switch (ffp->type) {
3524           case Macro_feature_type_gene:
3525             cds_gene_prot_field = CDSGeneProt_field_gene_description;
3526             break;
3527           case Macro_feature_type_prot:
3528             cds_gene_prot_field = CDSGeneProt_field_prot_description;
3529             break;
3530           case Macro_feature_type_mat_peptide_aa:
3531             cds_gene_prot_field = CDSGeneProt_field_mat_peptide_description;
3532             break;
3533         }
3534         break;
3535       case Feat_qual_legal_ec_number:
3536         switch (ffp->type) {
3537           case Macro_feature_type_prot:
3538             cds_gene_prot_field = CDSGeneProt_field_prot_ec_number;
3539             break;
3540           case Macro_feature_type_mat_peptide_aa:
3541             cds_gene_prot_field = CDSGeneProt_field_mat_peptide_ec_number;
3542             break;
3543         }
3544         break;
3545       case Feat_qual_legal_activity:
3546         switch (ffp->type) {
3547           case Macro_feature_type_prot:
3548             cds_gene_prot_field = CDSGeneProt_field_prot_activity;
3549             break;
3550           case Macro_feature_type_mat_peptide_aa:
3551             cds_gene_prot_field = CDSGeneProt_field_mat_peptide_activity;
3552             break;
3553         }
3554         break;
3555     }
3556   }
3557   return cds_gene_prot_field;
3558 }
3559 
3560 
3561 /* Molinfo fields */
3562 typedef struct moleculetypebiomol {
3563   Int4 molecule_type;
3564   Int4 biomol;
3565   CharPtr name;
3566 } MoleculeTypeBiomolData, PNTR MoleculeTypeBiomolPtr;
3567 
3568 static MoleculeTypeBiomolData moleculetype_biomol[] = {
3569  { Molecule_type_unknown , 0, " " } ,
3570  { Molecule_type_genomic , MOLECULE_TYPE_GENOMIC , "genomic" } ,
3571  { Molecule_type_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "precursor RNA" } ,
3572  { Molecule_type_mRNA , MOLECULE_TYPE_MRNA , "mRNA" } ,
3573  { Molecule_type_rRNA , MOLECULE_TYPE_RRNA , "rRNA" } ,
3574  { Molecule_type_tRNA , MOLECULE_TYPE_TRNA , "tRNA" } ,
3575  { Molecule_type_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "genomic mRNA" } ,
3576  { Molecule_type_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } ,
3577  { Molecule_type_transcribed_RNA, MOLECULE_TYPE_TRANSCRIBED_RNA, "transcribed RNA" } ,
3578  { Molecule_type_ncRNA, MOLECULE_TYPE_NCRNA, "ncRNA" } ,
3579  { Molecule_type_transfer_messenger_RNA, MOLECULE_TYPE_TMRNA, "tmRNA" } ,
3580  { Molecule_type_macro_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other-genetic" }
3581 };
3582 
3583 
3584 #define NUM_moleculetype_biomol sizeof (moleculetype_biomol) / sizeof (MoleculeTypeBiomolData)
3585 
BiomolFromMoleculeType(Int4 molecule_type)3586 NLM_EXTERN Int4 BiomolFromMoleculeType (Int4 molecule_type)
3587 {
3588   Int4 i;
3589 
3590   for (i = 0; i < NUM_moleculetype_biomol; i++) {
3591     if (moleculetype_biomol[i].molecule_type == molecule_type) {
3592       return moleculetype_biomol[i].biomol;
3593     }
3594   }
3595   return -1;
3596 }
3597 
3598 
BiomolNameFromBiomol(Int4 biomol)3599 NLM_EXTERN CharPtr BiomolNameFromBiomol (Int4 biomol)
3600 {
3601   Int4 i;
3602 
3603   for (i = 0; i < NUM_moleculetype_biomol; i++) {
3604     if (moleculetype_biomol[i].biomol == biomol) {
3605       return moleculetype_biomol[i].name;
3606     }
3607   }
3608   return NULL;
3609 }
3610 
3611 
BiomolFromBiomolName(CharPtr biomol_name)3612 static Int4 BiomolFromBiomolName (CharPtr biomol_name)
3613 {
3614   Int4 i;
3615 
3616   for (i = 0; i < NUM_moleculetype_biomol; i++) {
3617     if (StringICmp (moleculetype_biomol[i].name, biomol_name) == 0) {
3618       return moleculetype_biomol[i].biomol;
3619     }
3620   }
3621   return -1;
3622 }
3623 
3624 
GetMoleculeTypeList(void)3625 NLM_EXTERN ValNodePtr GetMoleculeTypeList (void)
3626 {
3627   ValNodePtr list = NULL;
3628   Int4 i;
3629 
3630   for (i = 0; i < NUM_moleculetype_biomol; i++) {
3631     ValNodeAddPointer (&list, moleculetype_biomol[i].molecule_type, StringSave (moleculetype_biomol[i].name));
3632   }
3633   return list;
3634 }
3635 
3636 
3637 /* Technique fields */
3638 typedef struct techniquetypetech {
3639   Int4 technique_type;
3640   Int4 tech;
3641   CharPtr name;
3642 } TechniqueTypeTechData, PNTR TechniqueTypeTechPtr;
3643 
3644 static TechniqueTypeTechData techniquetype_tech[] = {
3645  { Technique_type_unknown , MI_TECH_unknown , " " } ,
3646  { Technique_type_standard , MI_TECH_standard , "standard" } ,
3647  { Technique_type_est , MI_TECH_est , "EST" } ,
3648  { Technique_type_sts , MI_TECH_sts , "STS" } ,
3649  { Technique_type_survey , MI_TECH_survey , "survey" } ,
3650  { Technique_type_genetic_map , MI_TECH_genemap , "genetic map" } ,
3651  { Technique_type_physical_map , MI_TECH_physmap , "physical map" } ,
3652  { Technique_type_derived , MI_TECH_derived , "derived" } ,
3653  { Technique_type_concept_trans , MI_TECH_concept_trans , "concept-trans" } ,
3654  { Technique_type_seq_pept , MI_TECH_seq_pept , "seq-pept" } ,
3655  { Technique_type_both , MI_TECH_both , "both" } ,
3656  { Technique_type_seq_pept_overlap , MI_TECH_seq_pept_overlap , "seq-pept-overlap" } ,
3657  { Technique_type_seq_pept_homol , MI_TECH_seq_pept_homol, "seq-pept-homol" } ,
3658  { Technique_type_concept_trans_a, MI_TECH_concept_trans_a, "concept-trans-a" } ,
3659  { Technique_type_htgs_1, MI_TECH_htgs_1, "HTGS-1" } ,
3660  { Technique_type_htgs_2, MI_TECH_htgs_2, "HTGS-2" } ,
3661  { Technique_type_htgs_3, MI_TECH_htgs_3, "HTGS-3" } ,
3662  { Technique_type_fli_cDNA, MI_TECH_fli_cdna, "fli-cDNA" } ,
3663  { Technique_type_htgs_0, MI_TECH_htgs_0, "HTGS-0" } ,
3664  { Technique_type_htc, MI_TECH_htc, "HTC" } ,
3665  { Technique_type_wgs, MI_TECH_wgs, "WGS" } ,
3666  { Technique_type_barcode, MI_TECH_barcode, "BARCODE" } ,
3667  { Technique_type_composite_wgs_htgs, MI_TECH_composite_wgs_htgs, "composite WGS-HTGS" } ,
3668  { Technique_type_tsa, MI_TECH_tsa, "TSA" } ,
3669  { Technique_type_targeted, MI_TECH_targeted, "targeted" } ,
3670  { Technique_type_other, MI_TECH_other, "other" }
3671 };
3672 
3673 
3674 #define NUM_techniquetype_tech sizeof (techniquetype_tech) / sizeof (TechniqueTypeTechData)
3675 
TechFromTechniqueType(Int4 technique_type)3676 NLM_EXTERN Int4 TechFromTechniqueType (Int4 technique_type)
3677 {
3678   Int4 i;
3679 
3680   for (i = 0; i < NUM_techniquetype_tech; i++) {
3681     if (techniquetype_tech[i].technique_type == technique_type) {
3682       return techniquetype_tech[i].tech;
3683     }
3684   }
3685   return -1;
3686 }
3687 
3688 
TechNameFromTech(Int4 tech)3689 NLM_EXTERN CharPtr TechNameFromTech (Int4 tech)
3690 {
3691   Int4 i;
3692 
3693   for (i = 0; i < NUM_techniquetype_tech; i++) {
3694     if (techniquetype_tech[i].tech == tech) {
3695       return techniquetype_tech[i].name;
3696     }
3697   }
3698   return NULL;
3699 }
3700 
3701 
TechFromTechName(CharPtr tech_name)3702 NLM_EXTERN Int4 TechFromTechName (CharPtr tech_name)
3703 {
3704   Int4 i;
3705 
3706   for (i = 0; i < NUM_techniquetype_tech; i++) {
3707     if (StringsAreEquivalent (techniquetype_tech[i].name, tech_name)) {
3708       return techniquetype_tech[i].tech;
3709     }
3710   }
3711   return -1;
3712 }
3713 
3714 
GetTechniqueTypeList(void)3715 NLM_EXTERN ValNodePtr GetTechniqueTypeList (void)
3716 {
3717   ValNodePtr list = NULL;
3718   Int4 i;
3719 
3720   for (i = 0; i < NUM_techniquetype_tech; i++) {
3721     ValNodeAddPointer (&list, techniquetype_tech[i].technique_type, StringSave (techniquetype_tech[i].name));
3722   }
3723   return list;
3724 }
3725 
3726 
3727 /* Completedness fields */
3728 typedef struct completednesstypecompleteness {
3729   Int4 completedness_type;
3730   Int4 completeness;
3731   CharPtr name;
3732 } CompletednessTypeCompletenessData, PNTR CompletednessTypeCompletenessPtr;
3733 
3734 static CompletednessTypeCompletenessData completednesstype_completeness[] = {
3735  { Completedness_type_unknown, 0, " " } ,
3736  { Completedness_type_complete, 1, "complete" } ,
3737  { Completedness_type_partial, 2, "partial" } ,
3738  { Completedness_type_no_left, 3, "no left" } ,
3739  { Completedness_type_no_right, 4, "no right" } ,
3740  { Completedness_type_no_ends, 5, "no ends" } ,
3741  { Completedness_type_has_left, 6, "has left" } ,
3742  { Completedness_type_has_right, 7, "has right" } ,
3743  { Completedness_type_other, 255, "other" }
3744 };
3745 
3746 #define NUM_completednesstype_completeness sizeof (completednesstype_completeness) / sizeof (CompletednessTypeCompletenessData)
3747 
CompletenessFromCompletednessType(Int4 completedness_type)3748 NLM_EXTERN Int4 CompletenessFromCompletednessType (Int4 completedness_type)
3749 {
3750   Int4 i;
3751 
3752   for (i = 0; i < NUM_completednesstype_completeness; i++) {
3753     if (completednesstype_completeness[i].completedness_type == completedness_type) {
3754       return completednesstype_completeness[i].completeness;
3755     }
3756   }
3757   return -1;
3758 }
3759 
3760 
CompletenessNameFromCompleteness(Int4 completeness)3761 NLM_EXTERN CharPtr CompletenessNameFromCompleteness (Int4 completeness)
3762 {
3763   Int4 i;
3764 
3765   for (i = 0; i < NUM_completednesstype_completeness; i++) {
3766     if (completednesstype_completeness[i].completeness == completeness) {
3767       return completednesstype_completeness[i].name;
3768     }
3769   }
3770   return NULL;
3771 }
3772 
3773 
CompletenessFromCompletenessName(CharPtr completeness_name)3774 static Int4 CompletenessFromCompletenessName (CharPtr completeness_name)
3775 {
3776   Int4 i;
3777 
3778   for (i = 0; i < NUM_completednesstype_completeness; i++) {
3779     if (StringICmp (completednesstype_completeness[i].name, completeness_name) == 0) {
3780       return completednesstype_completeness[i].completeness;
3781     }
3782   }
3783   return -1;
3784 }
3785 
3786 
GetCompletednessTypeList(void)3787 NLM_EXTERN ValNodePtr GetCompletednessTypeList (void)
3788 {
3789   ValNodePtr list = NULL;
3790   Int4 i;
3791 
3792   for (i = 0; i < NUM_completednesstype_completeness; i++) {
3793     ValNodeAddPointer (&list, completednesstype_completeness[i].completedness_type, StringSave (completednesstype_completeness[i].name));
3794   }
3795   return list;
3796 }
3797 
3798 
3799 /* Molecule class fields */
3800 typedef struct moleculeclasstypemol {
3801   Int4 moleculeclass_type;
3802   Int4 mol;
3803   CharPtr name;
3804 } MoleculeClassTypeMolData, PNTR MoleculeClassTypeMolPtr;
3805 
3806 static MoleculeClassTypeMolData moleculeclasstype_mol[] = {
3807  { Molecule_class_type_unknown, 0, " " } ,
3808  { Molecule_class_type_dna, MOLECULE_CLASS_DNA, "DNA" } ,
3809  { Molecule_class_type_rna, MOLECULE_CLASS_RNA, "RNA" } ,
3810  { Molecule_class_type_protein, MOLECULE_CLASS_PROTEIN, "protein" } ,
3811  { Molecule_class_type_nucleotide, MOLECULE_CLASS_NUC, "nucleotide" } ,
3812  { Molecule_class_type_other, 255, "other" }
3813 };
3814 
3815 
3816 #define NUM_moleculeclasstype_mol sizeof (moleculeclasstype_mol) / sizeof (MoleculeClassTypeMolData)
3817 
MolFromMoleculeClassType(Int4 moleculeclass_type)3818 NLM_EXTERN Int4 MolFromMoleculeClassType (Int4 moleculeclass_type)
3819 {
3820   Int4 i;
3821 
3822   for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
3823     if (moleculeclasstype_mol[i].moleculeclass_type == moleculeclass_type) {
3824       return moleculeclasstype_mol[i].mol;
3825     }
3826   }
3827   return -1;
3828 }
3829 
3830 
MolNameFromMol(Int4 mol)3831 NLM_EXTERN CharPtr MolNameFromMol (Int4 mol)
3832 {
3833   Int4 i;
3834 
3835   for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
3836     if (moleculeclasstype_mol[i].mol == mol) {
3837       return moleculeclasstype_mol[i].name;
3838     }
3839   }
3840   return NULL;
3841 }
3842 
3843 
MolFromMolName(CharPtr mol_name)3844 static Int4 MolFromMolName (CharPtr mol_name)
3845 {
3846   Int4 i;
3847 
3848   for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
3849     if (StringICmp (moleculeclasstype_mol[i].name, mol_name) == 0) {
3850       return moleculeclasstype_mol[i].mol;
3851     }
3852   }
3853   return -1;
3854 }
3855 
3856 
GetMoleculeClassTypeList(void)3857 NLM_EXTERN ValNodePtr GetMoleculeClassTypeList (void)
3858 {
3859   ValNodePtr list = NULL;
3860   Int4 i;
3861 
3862   for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
3863     ValNodeAddPointer (&list, moleculeclasstype_mol[i].moleculeclass_type, StringSave (moleculeclasstype_mol[i].name));
3864   }
3865   return list;
3866 }
3867 
3868 
3869 /* Topology fields */
3870 typedef struct topologytypetopology {
3871   Int4 topology_type;
3872   Int4 topology;
3873   CharPtr name;
3874 } TopologyTypeTopologyData, PNTR TopologyTypeTopologyPtr;
3875 
3876 static TopologyTypeTopologyData topologytype_topology[] = {
3877  { Topology_type_unknown, 0, " " } ,
3878  { Topology_type_linear, TOPOLOGY_LINEAR, "linear" } ,
3879  { Topology_type_circular, TOPOLOGY_CIRCULAR, "circular" } ,
3880  { Topology_type_tandem, TOPOLOGY_TANDEM, "tandem" } ,
3881  { Topology_type_other, 255, "other" }
3882 };
3883 
3884 #define NUM_topologytype_topology sizeof (topologytype_topology) / sizeof (TopologyTypeTopologyData)
3885 
TopologyFromTopologyType(Int4 topology_type)3886 NLM_EXTERN Int4 TopologyFromTopologyType (Int4 topology_type)
3887 {
3888   Int4 i;
3889 
3890   for (i = 0; i < NUM_topologytype_topology; i++) {
3891     if (topologytype_topology[i].topology_type == topology_type) {
3892       return topologytype_topology[i].topology;
3893     }
3894   }
3895   return -1;
3896 }
3897 
3898 
TopologyNameFromTopology(Int4 topology)3899 NLM_EXTERN CharPtr TopologyNameFromTopology (Int4 topology)
3900 {
3901   Int4 i;
3902 
3903   for (i = 0; i < NUM_topologytype_topology; i++) {
3904     if (topologytype_topology[i].topology == topology) {
3905       return topologytype_topology[i].name;
3906     }
3907   }
3908   return NULL;
3909 }
3910 
3911 
TopologyFromTopologyName(CharPtr topology_name)3912 static Int4 TopologyFromTopologyName (CharPtr topology_name)
3913 {
3914   Int4 i;
3915 
3916   for (i = 0; i < NUM_topologytype_topology; i++) {
3917     if (StringICmp (topologytype_topology[i].name, topology_name) == 0) {
3918       return topologytype_topology[i].topology;
3919     }
3920   }
3921   return -1;
3922 }
3923 
3924 
GetTopologyTypeList(void)3925 NLM_EXTERN ValNodePtr GetTopologyTypeList (void)
3926 {
3927   ValNodePtr list = NULL;
3928   Int4 i;
3929 
3930   for (i = 0; i < NUM_topologytype_topology; i++) {
3931     ValNodeAddPointer (&list, topologytype_topology[i].topology_type, StringSave (topologytype_topology[i].name));
3932   }
3933   return list;
3934 }
3935 
3936 
3937 /* strand fields */
3938 typedef struct strandtypestrand {
3939   Int4 strand_type;
3940   Int4 strand;
3941   CharPtr name;
3942 } StrandTypeStrandData, PNTR StrandTypeStrandPtr;
3943 
3944 static StrandTypeStrandData strandtype_strand[] = {
3945  { Strand_type_unknown, 0, " " } ,
3946  { Strand_type_single, STRANDEDNESS_SINGLE, "single" } ,
3947  { Strand_type_double__, STRANDEDNESS_DOUBLE, "double" } ,
3948  { Strand_type_mixed, 3, "mixed" } ,
3949  { Strand_type_mixed_rev, 4, "mixed-rev" } ,
3950  { Strand_type_other, 255, "other" }
3951 };
3952 
3953 #define NUM_strandtype_strand sizeof (strandtype_strand) / sizeof (StrandTypeStrandData)
3954 
StrandFromStrandType(Int4 strand_type)3955 NLM_EXTERN Int4 StrandFromStrandType (Int4 strand_type)
3956 {
3957   Int4 i;
3958 
3959   for (i = 0; i < NUM_strandtype_strand; i++) {
3960     if (strandtype_strand[i].strand_type == strand_type) {
3961       return strandtype_strand[i].strand;
3962     }
3963   }
3964   return -1;
3965 }
3966 
3967 
StrandNameFromStrand(Int4 strand)3968 NLM_EXTERN CharPtr StrandNameFromStrand (Int4 strand)
3969 {
3970   Int4 i;
3971 
3972   for (i = 0; i < NUM_strandtype_strand; i++) {
3973     if (strandtype_strand[i].strand == strand) {
3974       return strandtype_strand[i].name;
3975     }
3976   }
3977   return NULL;
3978 }
3979 
3980 
StrandFromStrandName(CharPtr strand_name)3981 static Int4 StrandFromStrandName (CharPtr strand_name)
3982 {
3983   Int4 i;
3984 
3985   for (i = 0; i < NUM_strandtype_strand; i++) {
3986     if (StringICmp (strandtype_strand[i].name, strand_name) == 0) {
3987       return strandtype_strand[i].strand;
3988     }
3989   }
3990   return -1;
3991 }
3992 
3993 
GetStrandTypeList(void)3994 NLM_EXTERN ValNodePtr GetStrandTypeList (void)
3995 {
3996   ValNodePtr list = NULL;
3997   Int4 i;
3998 
3999   for (i = 0; i < NUM_strandtype_strand; i++) {
4000     ValNodeAddPointer (&list, strandtype_strand[i].strand_type, StringSave (strandtype_strand[i].name));
4001   }
4002   return list;
4003 }
4004 
4005 
GetSequenceQualValName(ValNodePtr field)4006 static CharPtr GetSequenceQualValName (ValNodePtr field)
4007 {
4008   CharPtr val = NULL;
4009 
4010   if (field == NULL) return NULL;
4011   switch (field->choice) {
4012     case MolinfoField_molecule:
4013       val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue));
4014       break;
4015     case MolinfoField_technique:
4016       val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue));
4017       break;
4018     case MolinfoField_completedness:
4019       val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue));
4020       break;
4021     case MolinfoField_mol_class:
4022       val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue));
4023       break;
4024     case MolinfoField_topology:
4025       val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue));
4026       break;
4027     case MolinfoField_strand:
4028       val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue));
4029       break;
4030   }
4031   return val;
4032 }
4033 
4034 
GetSequenceQualName(ValNodePtr field)4035 static CharPtr GetSequenceQualName (ValNodePtr field)
4036 {
4037   CharPtr str = NULL, fieldname = "invalid field", val = "invalid value";
4038   CharPtr fmt = "%s %s";
4039 
4040   if (field == NULL) return NULL;
4041   switch (field->choice) {
4042     case MolinfoField_molecule:
4043       fieldname = "molecule";
4044       val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue));
4045       break;
4046     case MolinfoField_technique:
4047       fieldname = "technique";
4048       val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue));
4049       break;
4050     case MolinfoField_completedness:
4051       fieldname = "completeness";
4052       val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue));
4053       break;
4054     case MolinfoField_mol_class:
4055       fieldname = "class";
4056       val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue));
4057       break;
4058     case MolinfoField_topology:
4059       fieldname = "topology";
4060       val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue));
4061       break;
4062     case MolinfoField_strand:
4063       fieldname = "strand";
4064       val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue));
4065       break;
4066   }
4067   if (val == NULL) {
4068     val = "Invalid value";
4069   }
4070   str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fieldname) + StringLen (val)));
4071   sprintf (str, fmt, fieldname, val);
4072   return str;
4073 }
4074 
4075 
MakeSequenceQualFieldTypeList(void)4076 static ValNodePtr MakeSequenceQualFieldTypeList (void)
4077 {
4078   ValNodePtr field_list = NULL;
4079   ValNodePtr field;
4080 
4081   field = ValNodeNew (NULL);
4082   field->choice = MolinfoField_molecule;
4083   field->data.ptrvalue = NULL;
4084   ValNodeAddPointer (&field_list, FieldType_molinfo_field, field);
4085   field = ValNodeNew (NULL);
4086   field->choice = MolinfoField_technique;
4087   field->data.ptrvalue = NULL;
4088   ValNodeAddPointer (&field_list, FieldType_molinfo_field, field);
4089   field = ValNodeNew (NULL);
4090   field->choice = MolinfoField_completedness;
4091   field->data.ptrvalue = NULL;
4092   ValNodeAddPointer (&field_list, FieldType_molinfo_field, field);
4093   field = ValNodeNew (NULL);
4094   field->choice = MolinfoField_mol_class;
4095   field->data.ptrvalue = NULL;
4096   ValNodeAddPointer (&field_list, FieldType_molinfo_field, field);
4097   field = ValNodeNew (NULL);
4098   field->choice = MolinfoField_topology;
4099   field->data.ptrvalue = NULL;
4100   ValNodeAddPointer (&field_list, FieldType_molinfo_field, field);
4101   field = ValNodeNew (NULL);
4102   field->choice = MolinfoField_strand;
4103   field->data.ptrvalue = NULL;
4104   ValNodeAddPointer (&field_list, FieldType_molinfo_field, field);
4105   return field_list;
4106 }
4107 
4108 
MolinfoFieldFromFieldAndStringValue(ValNodePtr field,CharPtr val)4109 static ValNodePtr MolinfoFieldFromFieldAndStringValue (ValNodePtr field, CharPtr val)
4110 {
4111   ValNodePtr mp = NULL;
4112   Int4       enum_val;
4113 
4114   if (field == NULL) {
4115     return NULL;
4116   }
4117 
4118   switch (field->choice) {
4119     case MolinfoField_molecule:
4120       enum_val = BiomolFromBiomolName(val);
4121       if (enum_val > -1) {
4122         mp = ValNodeNew (NULL);
4123         mp->choice = MolinfoField_molecule;
4124         mp->data.intvalue = enum_val;
4125       }
4126       break;
4127     case MolinfoField_technique:
4128       enum_val = TechFromTechName(val);
4129       if (enum_val > -1) {
4130         mp = ValNodeNew (NULL);
4131         mp->choice = MolinfoField_technique;
4132         mp->data.intvalue = enum_val;
4133       }
4134       break;
4135     case MolinfoField_completedness:
4136       enum_val = CompletenessFromCompletenessName(val);
4137       if (enum_val > -1) {
4138         mp = ValNodeNew (NULL);
4139         mp->choice = MolinfoField_completedness;
4140         mp->data.intvalue = enum_val;
4141       }
4142       break;
4143     case MolinfoField_mol_class:
4144       enum_val = MolFromMolName(val);
4145       if (enum_val > -1) {
4146         mp = ValNodeNew (NULL);
4147         mp->choice = MolinfoField_mol_class;
4148         mp->data.intvalue = enum_val;
4149       }
4150       break;
4151     case MolinfoField_topology:
4152       enum_val = TopologyFromTopologyName(val);
4153       if (enum_val > -1) {
4154         mp = ValNodeNew (NULL);
4155         mp->choice = MolinfoField_topology;
4156         mp->data.intvalue = enum_val;
4157       }
4158       break;
4159     case MolinfoFieldPair_strand:
4160       enum_val = StrandFromStrandName(val);
4161       if (enum_val > -1) {
4162         mp = ValNodeNew (NULL);
4163         mp->choice = MolinfoFieldPair_strand;
4164         mp->data.intvalue = enum_val;
4165       }
4166       break;
4167   }
4168   return mp;
4169 }
4170 
4171 
4172 /* bond types */
4173 typedef struct bondtype {
4174   Int4 macro_bond_type;
4175   Int4 asn1_bond_type;
4176   CharPtr name;
4177 } BondTypeData, PNTR BondTypePtr;
4178 
4179 static BondTypeData bond_type[] = {
4180  { Bond_type_disulfide, 1, "Disulfide" } ,
4181  { Bond_type_thioester, 2, "Thioester" } ,
4182  { Bond_type_crosslink, 3, "Crosslink" } ,
4183  { Bond_type_thioether, 4, "Thioether" } ,
4184  { Bond_type_other, 255, "Other" }
4185 };
4186 
4187 #define NUM_bond_type sizeof (bond_type) / sizeof (BondTypeData)
4188 
Asn1BondTypeFromMacroBondType(Int4 macro_bond_type)4189 NLM_EXTERN Int4 Asn1BondTypeFromMacroBondType (Int4 macro_bond_type)
4190 {
4191   Int4 i;
4192 
4193   for (i = 0; i < NUM_bond_type; i++) {
4194     if (bond_type[i].macro_bond_type == macro_bond_type) {
4195       return bond_type[i].asn1_bond_type;
4196     }
4197   }
4198   return -1;
4199 }
4200 
4201 
MacroBondTypeFromAsn1BondType(Int4 asn1_bond_type)4202 NLM_EXTERN Int4 MacroBondTypeFromAsn1BondType (Int4 asn1_bond_type)
4203 {
4204   Int4 i;
4205 
4206   for (i = 0; i < NUM_bond_type; i++) {
4207     if (bond_type[i].asn1_bond_type == asn1_bond_type) {
4208       return bond_type[i].macro_bond_type;
4209     }
4210   }
4211   return -1;
4212 }
4213 
4214 
GetMacroBondTypeName(Int4 macro_bond_type)4215 NLM_EXTERN CharPtr GetMacroBondTypeName (Int4 macro_bond_type)
4216 {
4217   Int4 i;
4218 
4219   for (i = 0; i < NUM_bond_type; i++) {
4220     if (bond_type[i].macro_bond_type == macro_bond_type) {
4221       return bond_type[i].name;
4222     }
4223   }
4224   return NULL;
4225 }
4226 
4227 
GetBondTypeList(void)4228 NLM_EXTERN ValNodePtr GetBondTypeList (void)
4229 {
4230   ValNodePtr list = NULL;
4231   Int4 i;
4232 
4233   for (i = 0; i < NUM_bond_type; i++) {
4234     ValNodeAddPointer (&list, bond_type[i].macro_bond_type, StringSave (bond_type[i].name));
4235   }
4236   return list;
4237 }
4238 
4239 
4240 /* site types */
4241 typedef struct sitetype {
4242   Int4 macro_site_type;
4243   Int4 asn1_site_type;
4244   CharPtr name;
4245 } SiteTypeData, PNTR SiteTypePtr;
4246 
4247 static SiteTypeData site_type[] = {
4248   {Site_type_active, 1, "Active"},
4249   {Site_type_binding, 2, "Binding"},
4250   {Site_type_cleavage, 3, "Cleavage"},
4251   {Site_type_inhibit, 4, "Inhibit"},
4252   {Site_type_modified, 5, "Modified"},
4253   {Site_type_glycosylation, 6, "Glycosylation"},
4254   {Site_type_myristoylation, 7, "Myristoylation"},
4255   {Site_type_mutagenized, 8, "Mutagenized"},
4256   {Site_type_metal_binding, 9, "Metal-binding"},
4257   {Site_type_phosphorylation, 10, "Phosphorylation"},
4258   {Site_type_acetylation, 11, "Acetylation"},
4259   {Site_type_amidation, 12, "Amidation"},
4260   {Site_type_methylation, 13, "Methylation"},
4261   {Site_type_hydroxylation, 14, "Hydroxylation"},
4262   {Site_type_sulfatation, 15, "Sulfatation"},
4263   {Site_type_oxidative_deamination, 16, "Oxidative-deamination"},
4264   {Site_type_pyrrolidone_carboxylic_acid, 17, "Pyrrolidone-carboxylic-acid"},
4265   {Site_type_gamma_carboxyglutamic_acid, 18, "Gamma-carboxyglutamic-acid"},
4266   {Site_type_blocked, 19, "Blocked"},
4267   {Site_type_lipid_binding, 20, "Lipid-binding"},
4268   {Site_type_np_binding, 21, "np-binding"},
4269   {Site_type_dna_binding, 22, "DNA-binding"},
4270   {Site_type_signal_peptide, 23, "Signal-peptide"},
4271   {Site_type_transit_peptide, 24, "Transit-peptide"},
4272   {Site_type_transmembrane_region, 25, "Transmembrane-region"},
4273   {Site_type_nitrosylation, 26, "Nitrosylation"},
4274   {Site_type_other, 255, "Other"},
4275 };
4276 
4277 
4278 #define NUM_site_type sizeof (site_type) / sizeof (SiteTypeData)
4279 
Asn1SiteTypeFromMacroSiteType(Int4 macro_site_type)4280 NLM_EXTERN Int4 Asn1SiteTypeFromMacroSiteType (Int4 macro_site_type)
4281 {
4282   Int4 i;
4283 
4284   for (i = 0; i < NUM_site_type; i++) {
4285     if (site_type[i].macro_site_type == macro_site_type) {
4286       return site_type[i].asn1_site_type;
4287     }
4288   }
4289   return -1;
4290 }
4291 
4292 
MacroSiteTypeFromAsn1SiteType(Int4 asn1_site_type)4293 NLM_EXTERN Int4 MacroSiteTypeFromAsn1SiteType (Int4 asn1_site_type)
4294 {
4295   Int4 i;
4296 
4297   for (i = 0; i < NUM_site_type; i++) {
4298     if (site_type[i].asn1_site_type == asn1_site_type) {
4299       return site_type[i].macro_site_type;
4300     }
4301   }
4302   return -1;
4303 }
4304 
4305 
GetMacroSiteTypeName(Int4 macro_site_type)4306 NLM_EXTERN CharPtr GetMacroSiteTypeName (Int4 macro_site_type)
4307 {
4308   Int4 i;
4309 
4310   for (i = 0; i < NUM_site_type; i++) {
4311     if (site_type[i].macro_site_type == macro_site_type) {
4312       return site_type[i].name;
4313     }
4314   }
4315   return NULL;
4316 }
4317 
4318 
GetSiteTypeList(void)4319 NLM_EXTERN ValNodePtr GetSiteTypeList (void)
4320 {
4321   ValNodePtr list = NULL;
4322   Int4 i;
4323 
4324   for (i = 0; i < NUM_site_type; i++) {
4325     ValNodeAddPointer (&list, site_type[i].macro_site_type, StringSave (site_type[i].name));
4326   }
4327   return list;
4328 }
4329 
4330 
4331 /* Simple constraints */
DisallowCharacter(Char ch,Boolean disallow_slash)4332 static Boolean DisallowCharacter (Char ch, Boolean disallow_slash)
4333 {
4334   if (isalpha ((Int4) ch) || isdigit ((Int4) ch) || ch == '_' || ch == '-')
4335   {
4336     return TRUE;
4337   }
4338   else if (disallow_slash && ch == '/')
4339   {
4340     return TRUE;
4341   }
4342   else
4343   {
4344     return FALSE;
4345   }
4346 }
4347 
4348 
IsWholeWordMatchEx(CharPtr start,CharPtr found,Int4 match_len,Boolean disallow_slash)4349 static Boolean IsWholeWordMatchEx (CharPtr start, CharPtr found, Int4 match_len, Boolean disallow_slash)
4350 {
4351   Boolean rval = TRUE;
4352   Char    char_after;
4353   Char    char_before;
4354 
4355   if (match_len == 0)
4356   {
4357     rval = TRUE;
4358   }
4359   else if (start == NULL || found == NULL)
4360   {
4361     rval = FALSE;
4362   }
4363   else
4364   {
4365     char_after = *(found + match_len);
4366     if (found != start)
4367     {
4368       char_before = *(found - 1);
4369       if (DisallowCharacter (char_before, disallow_slash))
4370       {
4371         rval = FALSE;
4372       }
4373     }
4374     if (char_after != 0 && DisallowCharacter (char_after, disallow_slash))
4375     {
4376       rval = FALSE;
4377     }
4378   }
4379   return rval;
4380 }
4381 
4382 
IsWholeWordMatch(CharPtr start,CharPtr found,Int4 match_len)4383 static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len)
4384 {
4385   return IsWholeWordMatchEx (start, found, match_len, FALSE);
4386 }
4387 
4388 
IsStringConstraintEmpty(StringConstraintPtr scp)4389 NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp)
4390 {
4391   if (scp == NULL) {
4392     return TRUE;
4393   }
4394   if (scp->is_all_caps || scp->is_all_lower || scp->is_all_punct) {
4395     return FALSE;
4396   } else if (scp->match_text == NULL || scp->match_text[0] == 0) {
4397     return TRUE;
4398   } else {
4399     return FALSE;
4400   }
4401 }
4402 
4403 
StripUnimportantCharacters(CharPtr str,Boolean strip_space,Boolean strip_punct)4404 static void StripUnimportantCharacters (CharPtr str, Boolean strip_space, Boolean strip_punct)
4405 {
4406   CharPtr src, dst;
4407 
4408   if (str == NULL) {
4409     return;
4410   }
4411 
4412   src = str;
4413   dst = str;
4414   while (*src != 0) {
4415     if ((strip_space && isspace (*src)) || (strip_punct && ispunct (*src))) {
4416       /* don't copy this character */
4417     } else {
4418       if (src > dst) {
4419         *dst = *src;
4420       }
4421       dst++;
4422     }
4423     src++;
4424   }
4425   *dst = 0;
4426 }
4427 
4428 
IsWholeWordAtStart(CharPtr str,CharPtr cp,Boolean is_start)4429 static Boolean IsWholeWordAtStart (CharPtr str, CharPtr cp, Boolean is_start)
4430 {
4431   if (cp == str) {
4432     return is_start;
4433   } else {
4434     return !isalpha (*(cp - 1));
4435   }
4436 }
4437 
4438 
CaseNCompare(CharPtr str1,CharPtr str2,Int4 n,Boolean case_sensitive)4439 static int CaseNCompare (CharPtr str1, CharPtr str2, Int4 n, Boolean case_sensitive)
4440 {
4441   if (n == 0) {
4442     return 0;
4443   } else if (case_sensitive) {
4444     return StringNCmp (str1, str2, n);
4445   } else {
4446     return StringNICmp (str1, str2, n);
4447   }
4448 }
4449 
4450 
4451 static Boolean
AdvancedStringCompare(CharPtr str,CharPtr str_match,StringConstraintPtr scp,Boolean is_start,Int4Ptr p_target_match_len)4452 AdvancedStringCompare
4453 (CharPtr str,
4454  CharPtr str_match,
4455  StringConstraintPtr scp,
4456  Boolean is_start,
4457  Int4Ptr p_target_match_len)
4458 {
4459   CharPtr cp_s, cp_m;
4460   Boolean match = TRUE, recursive_match = FALSE;
4461   Boolean word_start_s, word_start_m;
4462   WordSubstitutionPtr word;
4463   Int4    len1, len2, init_target_match_len = 0, target_match_len = 0;
4464   ValNodePtr syn;
4465 
4466   if (str == NULL) {
4467     return FALSE;
4468   } else if (scp == NULL || str_match == NULL) {
4469     return TRUE;
4470   }
4471 
4472   cp_s = str;
4473   cp_m = str_match;
4474   if (p_target_match_len != NULL) {
4475     init_target_match_len = *p_target_match_len;
4476   }
4477 
4478   while (match && *cp_m != 0 && !recursive_match) {
4479     /* first, check to see if we're skipping synonyms */
4480     for (word = scp->ignore_words; word != NULL && !recursive_match; word = word->next) {
4481       len1 = StringLen (word->word);
4482       if (CaseNCompare(word->word, cp_m, len1, word->case_sensitive) == 0) { /* text match */
4483         word_start_m = IsWholeWordAtStart (str_match, cp_m, is_start);
4484         if (!word->whole_word || (!isalpha (*(cp_m + len1)) && word_start_m)) { /* whole word match */
4485           if (word->synonyms == NULL) {
4486             if (AdvancedStringCompare (cp_s, cp_m + len1, scp, word_start_m, &target_match_len)) {
4487               recursive_match = TRUE;
4488             }
4489           } else {
4490             for (syn = word->synonyms; syn != NULL && !recursive_match; syn = syn->next) {
4491               len2 = StringLen (syn->data.ptrvalue);
4492               if (CaseNCompare(syn->data.ptrvalue, cp_s, len2, word->case_sensitive) == 0) { /* text match */
4493                 word_start_s = IsWholeWordAtStart (str, cp_s, is_start);
4494                 if (!word->whole_word || (!isalpha (*(cp_s + len2)) && word_start_s)) { /* whole word match */
4495                   if (AdvancedStringCompare (cp_s + len2, cp_m + len1, scp, word_start_m && word_start_s, &target_match_len)) {
4496                     recursive_match = TRUE;
4497                   }
4498                 }
4499               }
4500             }
4501           }
4502         }
4503       }
4504     }
4505     if (!recursive_match) {
4506       if (CaseNCompare(cp_m, cp_s, 1, scp->case_sensitive) == 0) {
4507         cp_m++;
4508         cp_s++;
4509         target_match_len++;
4510       } else if (scp->ignore_space && (isspace (*cp_m) || isspace (*cp_s))) {
4511         if (isspace (*cp_m)) {
4512           cp_m++;
4513         }
4514         if (isspace (*cp_s)) {
4515           cp_s++;
4516           target_match_len++;
4517         }
4518       } else if (scp->ignore_punct && (ispunct (*cp_m) || ispunct (*cp_s))) {
4519         if (ispunct (*cp_m)) {
4520           cp_m++;
4521         }
4522         if (ispunct (*cp_s)) {
4523           cp_s++;
4524           target_match_len++;
4525         }
4526       } else {
4527         match = FALSE;
4528       }
4529     }
4530   }
4531 
4532   if (match && !recursive_match) {
4533     while ((scp->ignore_space && isspace (*cp_s)) || (scp->ignore_punct && ispunct (*cp_s))) {
4534       cp_s++;
4535       target_match_len++;
4536     }
4537     while ((scp->ignore_space && isspace (*cp_m)) || (scp->ignore_punct && ispunct (*cp_m))) {
4538       cp_m++;
4539     }
4540 
4541     if (*cp_m != 0) {
4542       match = FALSE;
4543     } else if ((scp->match_location == String_location_ends || scp->match_location == String_location_equals) && *cp_s != 0) {
4544       match = FALSE;
4545     } else if (scp->whole_word && (!is_start || isalpha (*cp_s))) {
4546       match = FALSE;
4547     }
4548   }
4549   if (match && p_target_match_len != NULL) {
4550     (*p_target_match_len) += target_match_len;
4551   }
4552 
4553   return match;
4554 }
4555 
4556 
AdvancedStringMatch(CharPtr str,StringConstraintPtr scp)4557 static Boolean AdvancedStringMatch (CharPtr str, StringConstraintPtr scp)
4558 {
4559   CharPtr cp;
4560   Boolean rval = FALSE;
4561 
4562   if (str == NULL) {
4563     rval = FALSE;
4564   } else if (scp == NULL) {
4565     rval = TRUE;
4566   } else if (AdvancedStringCompare (str, scp->match_text, scp, TRUE, NULL)) {
4567     rval = TRUE;
4568   } else if (scp->match_location == String_location_starts || scp->match_location == String_location_equals) {
4569     rval = FALSE;
4570   } else {
4571     cp = str + 1;
4572     while (!rval && *cp != 0) {
4573       if (scp->whole_word) {
4574         while (*cp != 0 && isalpha (*(cp-1))) {
4575           cp++;
4576         }
4577       }
4578       if (*cp != 0) {
4579         if (AdvancedStringCompare (cp, scp->match_text, scp, TRUE, NULL)) {
4580           rval = TRUE;
4581         } else {
4582           cp++;
4583         }
4584       }
4585     }
4586   }
4587   return rval;
4588 }
4589 
TestAdvancedStringMatch(void)4590 static void TestAdvancedStringMatch (void)
4591 {
4592   StringConstraintPtr scp;
4593   CharPtr text = "The quick brown fox jumped over the lazy dog.";
4594   CharPtr summ;
4595 
4596   scp = StringConstraintNew ();
4597   scp->match_location = String_location_contains;
4598   scp->match_text = StringSave ("dog leaped");
4599   scp->ignore_words = WordSubstitutionNew();
4600   scp->ignore_words->word = StringSave ("leap");
4601   ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("jump"));
4602   scp->ignore_words->next = WordSubstitutionNew();
4603   scp->ignore_words->next->word = StringSave ("dog");
4604   ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("fox"));
4605 
4606 
4607   AdvancedStringMatch(text, scp);
4608   summ = SummarizeStringConstraint (scp);
4609   summ = MemFree (summ);
4610   scp = StringConstraintFree (scp);
4611 
4612   scp = StringConstraintNew ();
4613   scp->match_location = String_location_equals;
4614   scp->match_text = StringSave ("A fast beige wolf leaped across a sleepy beagle.");
4615   scp->ignore_words = WordSubstitutionNew();
4616   scp->ignore_words->word = StringSave ("a");
4617   scp->ignore_words->whole_word = TRUE;
4618   ValNodeAddPointer (&scp->ignore_words->synonyms, 0, StringSave ("the"));
4619   scp->ignore_words->next = WordSubstitutionNew();
4620   scp->ignore_words->next->word = StringSave ("fast");
4621   ValNodeAddPointer (&scp->ignore_words->next->synonyms, 0, StringSave ("quick"));
4622   scp->ignore_words->next->next = WordSubstitutionNew();
4623   scp->ignore_words->next->next->word = StringSave ("beige");
4624   ValNodeAddPointer (&scp->ignore_words->next->next->synonyms, 0, StringSave ("brown"));
4625   scp->ignore_words->next->next->next = WordSubstitutionNew();
4626   scp->ignore_words->next->next->next->word = StringSave ("wolf");
4627   ValNodeAddPointer (&scp->ignore_words->next->next->next->synonyms, 0, StringSave ("fox"));
4628   scp->ignore_words->next->next->next->next = WordSubstitutionNew();
4629   scp->ignore_words->next->next->next->next->word = StringSave ("across");
4630   ValNodeAddPointer (&scp->ignore_words->next->next->next->next->synonyms, 0, StringSave ("over"));
4631   scp->ignore_words->next->next->next->next->next = WordSubstitutionNew();
4632   scp->ignore_words->next->next->next->next->next->word = StringSave ("beagle");
4633   ValNodeAddPointer (&scp->ignore_words->next->next->next->next->next->synonyms, 0, StringSave ("dog"));
4634 
4635   AdvancedStringMatch(text, scp);
4636   summ = SummarizeStringConstraint (scp);
4637   summ = MemFree (summ);
4638   scp = StringConstraintFree (scp);
4639 }
4640 
4641 
4642 static const CharPtr kPutative = "putative";
4643 
4644 static CharPtr s_weasels[] = {
4645   "candidate",
4646   "hypothetical",
4647   "novel",
4648   "possible",
4649   "potential",
4650   "predicted",
4651   "probable",
4652   "putative",
4653   "candidate",
4654   "uncharacterized",
4655   "unique",
4656   NULL
4657 };
4658 
SkipOneWeasel(CharPtr str)4659 static CharPtr SkipOneWeasel (CharPtr str)
4660 {
4661   Int4 i, len;
4662   CharPtr cp = str;
4663 
4664   for (i = 0; s_weasels[i] != NULL; i++) {
4665     len = StringLen (s_weasels[i]);
4666     if (StringNICmp (str, s_weasels[i], len) == 0
4667         && isspace (*(str + len))) {
4668       cp = str + len + 1;
4669       while (isspace (*cp)) {
4670         cp++;
4671       }
4672       return cp;
4673     }
4674   }
4675   return cp;
4676 }
4677 
4678 
SkipWeasel(CharPtr str)4679 static CharPtr SkipWeasel (CharPtr str)
4680 {
4681   CharPtr cp = str;
4682 
4683   cp = SkipOneWeasel (str);
4684   while (cp != str) {
4685     str = cp;
4686     cp = SkipOneWeasel (str);
4687   }
4688   return cp;
4689 }
4690 
4691 
DoesSingleStringMatchConstraint(CharPtr str,StringConstraintPtr scp)4692 NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp)
4693 {
4694   CharPtr pFound;
4695   Boolean rval = FALSE;
4696   Char    char_after = 0;
4697   CharPtr search, pattern, tmp_match;
4698 
4699   if (IsStringConstraintEmpty (scp)) return TRUE;
4700   if (StringHasNoText (str)) return FALSE;
4701 
4702   if (scp->ignore_weasel) {
4703     str = SkipWeasel(str);
4704   }
4705 
4706   if (scp->is_all_caps && !IsAllCaps(str)) {
4707     return FALSE;
4708   }
4709   if (scp->is_all_lower && !IsAllLowerCase(str)) {
4710     return FALSE;
4711   }
4712   if (scp->is_all_punct && !IsAllPunctuation(str)) {
4713     return FALSE;
4714   }
4715   if (scp->match_text == NULL) {
4716     return TRUE;
4717   }
4718 
4719   tmp_match = scp->match_text;
4720   if (scp->ignore_weasel) {
4721     scp->match_text = SkipWeasel (scp->match_text);
4722   }
4723 
4724   if (scp->match_location != String_location_inlist && scp->ignore_words != NULL) {
4725     scp->match_text = tmp_match;
4726     return AdvancedStringMatch(str, scp);
4727   }
4728 
4729   if (scp->match_location != String_location_inlist && (scp->ignore_space || scp->ignore_punct)) {
4730     search = StringSave (str);
4731     StripUnimportantCharacters (search, scp->ignore_space, scp->ignore_punct);
4732     pattern = StringSave (scp->match_text);
4733     StripUnimportantCharacters (pattern, scp->ignore_space, scp->ignore_punct);
4734   } else {
4735     search = str;
4736     pattern = scp->match_text;
4737   }
4738 
4739   switch (scp->match_location)
4740   {
4741     case String_location_contains:
4742         if (scp->case_sensitive)
4743         {
4744           pFound = StringSearch (search, pattern);
4745         }
4746         else
4747         {
4748           pFound = StringISearch (search, pattern);
4749         }
4750       if (pFound == NULL)
4751       {
4752         rval = FALSE;
4753       }
4754       else if (scp->whole_word)
4755       {
4756         rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
4757         while (!rval && pFound != NULL)
4758         {
4759             if (scp->case_sensitive)
4760             {
4761               pFound = StringSearch (pFound + 1, pattern);
4762             }
4763             else
4764             {
4765               pFound = StringISearch (pFound + 1, pattern);
4766             }
4767           if (pFound != NULL)
4768           {
4769             rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
4770           }
4771         }
4772       }
4773       else
4774       {
4775         rval = TRUE;
4776       }
4777       break;
4778     case String_location_starts:
4779         if (scp->case_sensitive)
4780         {
4781           pFound = StringSearch (search, pattern);
4782         }
4783         else
4784         {
4785           pFound = StringISearch (search, pattern);
4786         }
4787       if (pFound == search)
4788       {
4789         if (scp->whole_word)
4790         {
4791           rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
4792         }
4793         else
4794         {
4795           rval = TRUE;
4796         }
4797       }
4798       break;
4799     case String_location_ends:
4800         if (scp->case_sensitive)
4801         {
4802           pFound = StringSearch (search, pattern);
4803         }
4804         else
4805         {
4806           pFound = StringISearch (search, pattern);
4807         }
4808       while (pFound != NULL && !rval) {
4809           char_after = *(pFound + StringLen (pattern));
4810         if (char_after == 0)
4811         {
4812           if (scp->whole_word)
4813           {
4814             rval = IsWholeWordMatch (search, pFound, StringLen (pattern));
4815           }
4816           else
4817           {
4818             rval = TRUE;
4819           }
4820           /* stop the search, we're at the end of the string */
4821           pFound = NULL;
4822         }
4823         else
4824         {
4825             if (scp->case_sensitive)
4826             {
4827               pFound = StringSearch (pFound + 1, pattern);
4828             }
4829             else
4830             {
4831               pFound = StringISearch (pFound + 1, pattern);
4832             }
4833         }
4834       }
4835       break;
4836     case String_location_equals:
4837       if (scp->case_sensitive)
4838       {
4839         if (StringCmp (search, pattern) == 0)
4840         {
4841           rval = TRUE;
4842         }
4843       }
4844       else
4845       {
4846         if (StringICmp (search, pattern) == 0)
4847         {
4848           rval = TRUE;
4849         }
4850       }
4851       break;
4852     case String_location_inlist:
4853       if (scp->case_sensitive)
4854       {
4855         pFound = StringSearch (pattern, search);
4856       }
4857       else
4858       {
4859         pFound = StringISearch (pattern, search);
4860       }
4861       if (pFound == NULL)
4862       {
4863         rval = FALSE;
4864       }
4865       else
4866       {
4867         rval = IsWholeWordMatchEx (pattern, pFound, StringLen (search), TRUE);
4868         while (!rval && pFound != NULL)
4869         {
4870           if (scp->case_sensitive)
4871           {
4872             pFound = StringSearch (pFound + 1, search);
4873           }
4874           else
4875           {
4876             pFound = StringISearch (pFound + 1, search);
4877           }
4878           if (pFound != NULL)
4879           {
4880             rval = IsWholeWordMatchEx (pattern, pFound, StringLen (str), TRUE);
4881           }
4882         }
4883       }
4884       if (!rval) {
4885         /* look for spans */
4886         rval = IsStringInSpanInList (search, pattern);
4887       }
4888       break;
4889   }
4890 
4891   if (search != str) {
4892     search = MemFree (search);
4893   }
4894   if (pattern != scp->match_text) {
4895     pattern = MemFree (pattern);
4896   }
4897   scp->match_text = tmp_match;
4898     return rval;
4899 }
4900 
4901 
DoesStringMatchConstraint(CharPtr str,StringConstraintPtr scp)4902 NLM_EXTERN Boolean DoesStringMatchConstraint (CharPtr str, StringConstraintPtr scp)
4903 {
4904   Boolean rval;
4905 
4906   rval = DoesSingleStringMatchConstraint (str, scp);
4907   if (scp != NULL && scp->not_present) {
4908     rval = !rval;
4909   }
4910   return rval;
4911 }
4912 
4913 
DoesStringListMatchConstraint(ValNodePtr list,StringConstraintPtr scp)4914 static Boolean DoesStringListMatchConstraint (ValNodePtr list, StringConstraintPtr scp)
4915 {
4916   Int4 len = 1;
4917   CharPtr tmp;
4918   Boolean rval = FALSE;
4919   ValNodePtr vnp;
4920 
4921   if (IsStringConstraintEmpty (scp)) {
4922     return TRUE;
4923   }
4924   if (list == NULL) return FALSE;
4925 
4926   for (vnp = list; vnp != NULL; vnp = vnp->next) {
4927     len += StringLen (vnp->data.ptrvalue) + 2;
4928   }
4929 
4930   tmp = (CharPtr) MemNew (sizeof (Char) * len);
4931   for (vnp = list; vnp != NULL; vnp = vnp->next) {
4932     StringCat (tmp, vnp->data.ptrvalue);
4933     if (vnp->next != NULL) {
4934       StringCat (tmp, "; ");
4935     }
4936   }
4937 
4938   rval = DoesStringMatchConstraint (tmp, scp);
4939   tmp = MemFree (tmp);
4940   return rval;
4941 }
4942 
4943 
ReplaceStringConstraintPortionInString(CharPtr PNTR str,CharPtr replace,StringConstraintPtr scp)4944 NLM_EXTERN Boolean ReplaceStringConstraintPortionInString (CharPtr PNTR str, CharPtr replace, StringConstraintPtr scp)
4945 {
4946   Boolean rval = FALSE;
4947   CharPtr match_start, new_str;
4948   Int4    match_len, front_len;
4949 
4950   if (str == NULL) {
4951     return FALSE;
4952   } else if (*str == NULL) {
4953     if (IsStringConstraintEmpty (scp) || scp->not_present) {
4954       *str = StringSave (replace);
4955       rval = TRUE;
4956     }
4957   } else if (IsStringConstraintEmpty (scp)) {
4958     *str = MemFree (*str);
4959     *str = StringSave (replace);
4960     rval = TRUE;
4961   } else {
4962     switch (scp->match_location)
4963     {
4964       case String_location_equals:
4965       case String_location_inlist:
4966         if (DoesStringMatchConstraint (*str, scp)) {
4967           *str = MemFree (*str);
4968           *str = StringSave (replace);
4969           rval = TRUE;
4970         }
4971         break;
4972       case String_location_starts:
4973         match_len = 0;
4974         if (AdvancedStringCompare (*str, scp->match_text, scp, TRUE, &match_len)) {
4975           new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1));
4976           StringCpy (new_str, replace);
4977           StringCat (new_str, (*str) + match_len);
4978           *str = MemFree (*str);
4979           *str = new_str;
4980           rval = TRUE;
4981         }
4982         break;
4983       case String_location_contains:
4984         match_start = *str;
4985         while (*match_start != 0) {
4986           match_len = 0;
4987           if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str || !isalpha (*(match_start - 1))), &match_len)) {
4988             new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (*str) - match_len + StringLen (replace) + 1));
4989             front_len = match_start - *str;
4990             StringNCpy (new_str, *str, front_len);
4991             StringCat (new_str, replace);
4992             StringCat (new_str, match_start + match_len);
4993             *str = MemFree (*str);
4994             *str = new_str;
4995             match_start = (*str) + front_len + StringLen (replace);
4996             rval = TRUE;
4997           } else {
4998             match_start++;
4999           }
5000         }
5001         break;
5002       case String_location_ends:
5003         match_start = *str;
5004         while (!rval && *match_start != 0) {
5005           match_len = 0;
5006           if (AdvancedStringCompare (match_start, scp->match_text, scp, (match_start == *str), &match_len)
5007               && *(match_start + match_len) == 0) {
5008             new_str = (CharPtr) MemNew (sizeof (Char) * ((match_start - *str) + StringLen (replace) + 1));
5009             StringNCpy (new_str, *str, match_start - *str);
5010             StringCat (new_str, replace);
5011             *str = MemFree (*str);
5012             *str = new_str;
5013             rval = TRUE;
5014           } else {
5015             match_start++;
5016           }
5017         }
5018         break;
5019     }
5020   }
5021   return rval;
5022 }
5023 
5024 
RemoveStringConstraintPortionFromString(CharPtr PNTR str,StringConstraintPtr scp)5025 NLM_EXTERN Boolean RemoveStringConstraintPortionFromString (CharPtr PNTR str, StringConstraintPtr scp)
5026 {
5027   CharPtr pFound, src, dst, cp;
5028   Boolean rval = FALSE;
5029   Int4    match_len;
5030 
5031   if (str == NULL || *str == NULL) return FALSE;
5032 
5033   if (IsStringConstraintEmpty (scp) || scp->not_present) return FALSE;
5034 
5035   if (scp->match_location == String_location_equals) {
5036     if (scp->case_sensitive) {
5037       if (StringCmp (*str, scp->match_text) == 0) {
5038         rval = TRUE;
5039       }
5040     } else {
5041       if (StringICmp (*str, scp->match_text) == 0) {
5042         rval = TRUE;
5043       }
5044     }
5045     if (rval == TRUE) {
5046       **str = 0;
5047     }
5048   } else {
5049     match_len = StringLen (scp->match_text);
5050     if (scp->case_sensitive) {
5051       pFound = StringSearch (*str, scp->match_text);
5052     } else {
5053       pFound = StringISearch (*str, scp->match_text);
5054     }
5055     while (pFound != NULL) {
5056       switch (scp->match_location) {
5057         case String_location_contains:
5058         case String_location_inlist:
5059           if ((!scp->whole_word && scp->match_location != String_location_inlist)
5060               || IsWholeWordMatch (*str, pFound, match_len)) {
5061             src = pFound + match_len;
5062             dst = pFound;
5063             while (*src != 0) {
5064               *dst = *src;
5065               dst++;
5066               src++;
5067             }
5068             *dst = 0;
5069             rval = TRUE;
5070             cp = pFound;
5071           } else {
5072             cp = pFound + 1;
5073           }
5074           if (scp->case_sensitive) {
5075             pFound = StringSearch (cp, scp->match_text);
5076           } else {
5077             pFound = StringISearch (cp, scp->match_text);
5078           }
5079           break;
5080 
5081         case String_location_starts:
5082           if (pFound == *str && (!scp->whole_word || IsWholeWordMatch (*str, pFound, match_len))) {
5083             src = pFound + match_len;
5084             dst = pFound;
5085             while (*src != 0) {
5086               *dst = *src;
5087               dst++;
5088               src++;
5089             }
5090             *dst = 0;
5091             rval = TRUE;
5092           }
5093           pFound = NULL;
5094           break;
5095         case String_location_ends:
5096           if (*(pFound + match_len) == 0 && (!scp->whole_word || IsWholeWordMatch (*str, pFound, match_len))) {
5097             *pFound = 0;
5098             rval = TRUE;
5099             pFound = NULL;
5100           } else {
5101               if (scp->case_sensitive)
5102               {
5103                 pFound = StringSearch (pFound + 1, scp->match_text);
5104               }
5105               else
5106               {
5107                 pFound = StringISearch (pFound + 1, scp->match_text);
5108               }
5109           }
5110           break;
5111       }
5112     }
5113     }
5114   if (rval && StringHasNoText (*str)) {
5115     *str = MemFree (*str);
5116   }
5117     return rval;
5118 }
5119 
5120 
IsLocationConstraintEmpty(LocationConstraintPtr lcp)5121 NLM_EXTERN Boolean IsLocationConstraintEmpty (LocationConstraintPtr lcp)
5122 {
5123   Boolean rval = TRUE;
5124 
5125   if (lcp == NULL)
5126   {
5127     rval = TRUE;
5128   }
5129   else if (lcp->strand != Strand_constraint_any)
5130   {
5131     rval = FALSE;
5132   }
5133   else if (lcp->seq_type != Seqtype_constraint_any)
5134   {
5135     rval = FALSE;
5136   }
5137   else if (lcp->partial5 != Partial_constraint_either)
5138   {
5139     rval = FALSE;
5140   }
5141   else if (lcp->partial3 != Partial_constraint_either)
5142   {
5143     rval = FALSE;
5144   }
5145   else if (lcp->location_type != Location_type_constraint_any)
5146   {
5147     rval = FALSE;
5148   }
5149   else if (lcp->end5 != NULL || lcp->end3 != NULL)
5150   {
5151     rval = FALSE;
5152   }
5153   return rval;
5154 }
5155 
5156 
DoesStrandMatchConstraint(SeqLocPtr slp,LocationConstraintPtr lcp)5157 static Boolean DoesStrandMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
5158 {
5159   Uint2 strand;
5160   Boolean rval = FALSE;
5161 
5162   if (slp == NULL)
5163   {
5164     rval = FALSE;
5165   }
5166   else if (lcp == NULL || lcp->strand == Strand_constraint_any)
5167   {
5168     rval = TRUE;
5169   }
5170   else
5171   {
5172     strand = SeqLocStrand (slp);
5173     if (strand == Seq_strand_minus)
5174     {
5175       if (lcp->strand == Strand_constraint_minus)
5176       {
5177         rval = TRUE;
5178       }
5179       else
5180       {
5181         rval = FALSE;
5182       }
5183     }
5184     else
5185     {
5186       if (lcp->strand == Strand_constraint_plus)
5187       {
5188         rval = TRUE;
5189       }
5190       else
5191       {
5192         rval = FALSE;
5193       }
5194     }
5195   }
5196   return rval;
5197 }
5198 
5199 
DoesBioseqMatchSequenceType(BioseqPtr bsp,Uint2 seq_type)5200 static Boolean DoesBioseqMatchSequenceType (BioseqPtr bsp, Uint2 seq_type)
5201 {
5202   Boolean rval = FALSE;
5203 
5204   if (bsp == NULL) return FALSE;
5205   if (seq_type == Seqtype_constraint_any) return TRUE;
5206 
5207   if (ISA_na (bsp->mol) && seq_type == Seqtype_constraint_nuc)
5208   {
5209     rval = TRUE;
5210   }
5211   else if (ISA_aa (bsp->mol) && seq_type == Seqtype_constraint_prot)
5212   {
5213     rval = TRUE;
5214   }
5215   return rval;
5216 }
5217 
5218 
DoesSequenceTypeMatchContraint(SeqLocPtr slp,LocationConstraintPtr lcp)5219 static Boolean DoesSequenceTypeMatchContraint (SeqLocPtr slp, LocationConstraintPtr lcp)
5220 {
5221   Boolean   rval = FALSE;
5222   BioseqPtr bsp;
5223 
5224   if (slp == NULL)
5225   {
5226     rval = FALSE;
5227   }
5228   else if (lcp == NULL || lcp->seq_type == Seqtype_constraint_any)
5229   {
5230     rval = TRUE;
5231   }
5232   else
5233   {
5234     bsp = BioseqFindFromSeqLoc (slp);
5235     rval = DoesBioseqMatchSequenceType (bsp, lcp->seq_type);
5236   }
5237   return rval;
5238 }
5239 
5240 
DoesLocationMatchPartialnessConstraint(SeqLocPtr slp,LocationConstraintPtr lcp)5241 static Boolean DoesLocationMatchPartialnessConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
5242 {
5243   Boolean rval = FALSE;
5244   Boolean partial5, partial3;
5245 
5246   if (slp == NULL)
5247   {
5248     rval = FALSE;
5249   }
5250   else if (lcp == NULL)
5251   {
5252     rval = TRUE;
5253   }
5254   else
5255   {
5256     CheckSeqLocForPartial (slp, &partial5, &partial3);
5257     if (lcp->partial5 == Partial_constraint_partial && !partial5)
5258     {
5259       rval = FALSE;
5260     }
5261     else if (lcp->partial5 == Partial_constraint_complete && partial5)
5262     {
5263       rval = FALSE;
5264     }
5265     else if (lcp->partial3 == Partial_constraint_partial && !partial3)
5266     {
5267       rval = FALSE;
5268     }
5269     else if (lcp->partial3 == Partial_constraint_complete && partial3)
5270     {
5271       rval = FALSE;
5272     }
5273     else
5274     {
5275       rval = TRUE;
5276     }
5277   }
5278   return rval;
5279 }
5280 
5281 
DoesLocationMatchTypeConstraint(SeqLocPtr slp,LocationConstraintPtr lcp)5282 static Boolean DoesLocationMatchTypeConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
5283 {
5284   Boolean rval = FALSE, has_null = FALSE;
5285   Int4    num_intervals = 0;
5286   SeqLocPtr slp_tmp = NULL;
5287 
5288   if (slp == NULL)
5289   {
5290     rval = FALSE;
5291   }
5292   else if (lcp->location_type == Location_type_constraint_any)
5293   {
5294     rval = TRUE;
5295   }
5296   else
5297   {
5298     while ((slp_tmp = SeqLocFindNext (slp, slp_tmp)) != NULL) {
5299       if (slp_tmp->choice == SEQLOC_NULL)
5300       {
5301         has_null = TRUE;
5302       }
5303       else if (slp->choice != SEQLOC_EMPTY)
5304       {
5305         num_intervals++;
5306       }
5307     }
5308     if (lcp->location_type == Location_type_constraint_single_interval)
5309     {
5310       if (num_intervals == 1)
5311       {
5312         rval = TRUE;
5313       }
5314     }
5315     else if (lcp->location_type == Location_type_constraint_joined)
5316     {
5317       if (num_intervals > 1 && !has_null)
5318       {
5319         rval = TRUE;
5320       }
5321     }
5322     else if (lcp->location_type == Location_type_constraint_ordered)
5323     {
5324       if (num_intervals > 1 && has_null)
5325       {
5326         rval = TRUE;
5327       }
5328     }
5329   }
5330 
5331   return rval;
5332 }
5333 
5334 
DoesLocationMatchDistanceConstraint(SeqLocPtr slp,LocationConstraintPtr lcp)5335 static Boolean DoesLocationMatchDistanceConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
5336 {
5337   Boolean   rval = TRUE;
5338   Uint1     strand;
5339   BioseqPtr bsp = NULL;
5340   Int4      pos;
5341 
5342   if (slp == NULL)
5343   {
5344     return FALSE;
5345   }
5346   else if (lcp->end5 == NULL && lcp->end3 == NULL)
5347   {
5348     return TRUE;
5349   }
5350 
5351   strand = SeqLocStrand (slp);
5352   if (strand == Seq_strand_minus)
5353   {
5354     if (lcp->end5 != NULL)
5355     {
5356       bsp = BioseqFindFromSeqLoc (slp);
5357       if (bsp == NULL)
5358       {
5359         rval = FALSE;
5360       }
5361       else
5362       {
5363         pos = SeqLocStop (slp);
5364 
5365         switch (lcp->end5->choice)
5366         {
5367           case LocationPosConstraint_dist_from_end:
5368             if (bsp->length - pos - 1 != lcp->end5->data.intvalue)
5369             {
5370               rval = FALSE;
5371             }
5372             break;
5373           case LocationPosConstraint_max_dist_from_end:
5374             if (bsp->length - pos - 1 > lcp->end5->data.intvalue)
5375             {
5376               rval = FALSE;
5377             }
5378             break;
5379           case LocationPosConstraint_min_dist_from_end:
5380             if (bsp->length - pos - 1 < lcp->end5->data.intvalue)
5381             {
5382               rval = FALSE;
5383             }
5384             break;
5385         }
5386       }
5387     }
5388     if (lcp->end3 != NULL && rval)
5389     {
5390       pos = SeqLocStart (slp);
5391 
5392       switch (lcp->end3->choice)
5393       {
5394         case LocationPosConstraint_dist_from_end:
5395           if (pos != lcp->end3->data.intvalue)
5396           {
5397             rval = FALSE;
5398           }
5399           break;
5400         case LocationPosConstraint_max_dist_from_end:
5401           if (pos > lcp->end3->data.intvalue)
5402           {
5403             rval = FALSE;
5404           }
5405           break;
5406         case LocationPosConstraint_min_dist_from_end:
5407           if (pos < lcp->end3->data.intvalue)
5408           {
5409             rval = FALSE;
5410           }
5411           break;
5412       }
5413     }
5414   }
5415   else
5416   {
5417     if (lcp->end5 != NULL)
5418     {
5419       pos = SeqLocStart (slp);
5420 
5421       switch (lcp->end5->choice)
5422       {
5423         case LocationPosConstraint_dist_from_end:
5424           if (pos != lcp->end5->data.intvalue)
5425           {
5426             rval = FALSE;
5427           }
5428           break;
5429         case LocationPosConstraint_max_dist_from_end:
5430           if (pos > lcp->end5->data.intvalue)
5431           {
5432             rval = FALSE;
5433           }
5434           break;
5435         case LocationPosConstraint_min_dist_from_end:
5436           if (pos < lcp->end5->data.intvalue)
5437           {
5438             rval = FALSE;
5439           }
5440           break;
5441       }
5442     }
5443     if (lcp->end3 != NULL && rval)
5444     {
5445       bsp = BioseqFindFromSeqLoc (slp);
5446       if (bsp == NULL)
5447       {
5448         rval = FALSE;
5449       }
5450       else
5451       {
5452         pos = SeqLocStop (slp);
5453 
5454         switch (lcp->end3->choice)
5455         {
5456           case LocationPosConstraint_dist_from_end:
5457             if (bsp->length - pos - 1 != lcp->end3->data.intvalue)
5458             {
5459               rval = FALSE;
5460             }
5461             break;
5462           case LocationPosConstraint_max_dist_from_end:
5463             if (bsp->length - pos - 1 > lcp->end3->data.intvalue)
5464             {
5465               rval = FALSE;
5466             }
5467             break;
5468           case LocationPosConstraint_min_dist_from_end:
5469             if (bsp->length - pos - 1 < lcp->end3->data.intvalue)
5470             {
5471               rval = FALSE;
5472             }
5473             break;
5474         }
5475       }
5476     }
5477   }
5478   return rval;
5479 }
5480 
5481 
DoesLocationMatchConstraint(SeqLocPtr slp,LocationConstraintPtr lcp)5482 static Boolean DoesLocationMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
5483 
5484 {
5485   Boolean rval = FALSE;
5486 
5487   if (slp == NULL)
5488   {
5489     rval = FALSE;
5490   }
5491   else if (IsLocationConstraintEmpty(lcp))
5492   {
5493     rval = TRUE;
5494   }
5495   else if (DoesStrandMatchConstraint (slp, lcp)
5496            && DoesSequenceTypeMatchContraint (slp, lcp)
5497            && DoesLocationMatchPartialnessConstraint (slp, lcp)
5498            && DoesLocationMatchTypeConstraint(slp, lcp)
5499            && DoesLocationMatchDistanceConstraint(slp, lcp))
5500   {
5501     rval = TRUE;
5502   }
5503   return rval;
5504 }
5505 
5506 
DoesFeatureMatchLocationConstraint(SeqFeatPtr sfp,LocationConstraintPtr constraint)5507 static Boolean DoesFeatureMatchLocationConstraint (SeqFeatPtr sfp, LocationConstraintPtr constraint)
5508 {
5509   BioseqPtr bsp;
5510   SeqFeatPtr cds;
5511   SeqMgrFeatContext context;
5512   Boolean           rval = TRUE;
5513 
5514   if (sfp == NULL) {
5515     return FALSE;
5516   } else if (IsLocationConstraintEmpty (constraint)) {
5517     return TRUE;
5518   }
5519 
5520   bsp = BioseqFindFromSeqLoc (sfp->location);
5521   if (constraint->strand != Strand_constraint_any) {
5522     if (bsp == NULL) {
5523       rval = FALSE;
5524     } else if (ISA_aa (bsp->mol)) {
5525       cds = SeqMgrGetCDSgivenProduct (bsp, &context);
5526       if (cds == NULL) {
5527         rval = FALSE;
5528       } else if (!DoesStrandMatchConstraint (cds->location, constraint)) {
5529         rval = FALSE;
5530       }
5531     } else {
5532       if (!DoesStrandMatchConstraint (sfp->location, constraint)) {
5533         rval = FALSE;
5534       }
5535     }
5536   }
5537 
5538   if (!DoesBioseqMatchSequenceType (bsp, constraint->seq_type)) {
5539     rval = FALSE;
5540   }
5541 
5542   if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) {
5543     rval = FALSE;
5544   }
5545 
5546   if (!DoesLocationMatchTypeConstraint (sfp->location, constraint)) {
5547     rval = FALSE;
5548   }
5549 
5550   if (!DoesLocationMatchDistanceConstraint(sfp->location, constraint)) {
5551     rval = FALSE;
5552   }
5553   return rval;
5554 }
5555 
5556 
5557 
DoesSeqFeatMatchLocationConstraint(SeqFeatPtr sfp,LocationConstraintPtr constraint)5558 static Boolean DoesSeqFeatMatchLocationConstraint (SeqFeatPtr sfp, LocationConstraintPtr constraint)
5559 {
5560   if (sfp == NULL) {
5561     return FALSE;
5562   } else if (IsLocationConstraintEmpty(constraint)) {
5563     return TRUE;
5564   } else if (!DoesLocationMatchPartialnessConstraint (sfp->location, constraint)) {
5565     return FALSE;
5566   } else if (!DoesStrandMatchConstraint (sfp->location, constraint)) {
5567     return FALSE;
5568   } else if (!DoesLocationMatchTypeConstraint (sfp->location, constraint)) {
5569     return FALSE;
5570   } else if (!DoesLocationMatchDistanceConstraint(sfp->location, constraint)) {
5571     return FALSE;
5572   } else {
5573     return TRUE;
5574   }
5575 }
5576 
5577 
DoesBioseqMatchLocationConstraint(BioseqPtr bsp,LocationConstraintPtr constraint)5578 static Boolean DoesBioseqMatchLocationConstraint (BioseqPtr bsp, LocationConstraintPtr constraint)
5579 {
5580   Boolean    at_least_one = FALSE;
5581   Boolean    rval = TRUE;
5582   SeqFeatPtr sfp;
5583   SeqMgrFeatContext context;
5584 
5585   if (bsp == NULL) return FALSE;
5586 
5587   if (IsLocationConstraintEmpty(constraint)) {
5588     return TRUE;
5589   }
5590 
5591   if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) {
5592     return FALSE;
5593   }
5594   if (constraint->strand != Strand_constraint_any
5595       || constraint->partial5 != Partial_constraint_either
5596       || constraint->partial3 != Partial_constraint_either) {
5597     if (ISA_aa (bsp->mol)) {
5598       sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
5599       return DoesSeqFeatMatchLocationConstraint(sfp, constraint);
5600     } else {
5601       at_least_one = FALSE;
5602       for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
5603            sfp != NULL && rval;
5604            sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
5605          rval = DoesSeqFeatMatchLocationConstraint (sfp, constraint);
5606          at_least_one = TRUE;
5607       }
5608       return rval && at_least_one;
5609     }
5610   } else {
5611     return TRUE;
5612   }
5613 
5614 }
5615 
5616 
DoesObjectMatchLocationConstraint(Uint1 choice,Pointer data,LocationConstraintPtr constraint)5617 static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, LocationConstraintPtr constraint)
5618 {
5619   SeqFeatPtr  sfp;
5620   SeqDescrPtr sdp;
5621   CGPSetPtr   cgp;
5622   BioseqPtr  bsp = NULL;
5623   BioseqSetPtr bssp;
5624   ValNodePtr    vnp;
5625   ObjValNodePtr ovp;
5626 
5627   if (data == NULL) return FALSE;
5628 
5629   if (IsLocationConstraintEmpty(constraint)) {
5630     return TRUE;
5631   }
5632 
5633   if (choice == OBJ_SEQFEAT) {
5634     sfp = (SeqFeatPtr) data;
5635     return DoesFeatureMatchLocationConstraint (sfp, constraint);
5636   } else if (choice == OBJ_SEQDESC) {
5637     sdp = (SeqDescrPtr) data;
5638     if (sdp->extended != 0) {
5639       ovp = (ObjValNodePtr) sdp;
5640       if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
5641         bssp = (BioseqSetPtr) ovp->idx.parentptr;
5642         if (bssp != NULL && bssp->seq_set != NULL && IS_Bioseq_set (bssp->seq_set)) {
5643           bsp = (BioseqPtr) bssp->seq_set->data.ptrvalue;
5644         }
5645       } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
5646         bsp = (BioseqPtr) ovp->idx.parentptr;
5647       }
5648     }
5649     return DoesBioseqMatchLocationConstraint(bsp, constraint);
5650   } else if (choice == 0) {
5651     if (constraint->seq_type != Seqtype_constraint_any) {
5652       return FALSE;
5653     }
5654     cgp = (CGPSetPtr) data;
5655     for (vnp = cgp->cds_list; vnp != NULL; vnp = vnp->next) {
5656       if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) {
5657         return TRUE;
5658       }
5659     }
5660     for (vnp = cgp->gene_list; vnp != NULL; vnp = vnp->next) {
5661       if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) {
5662         return TRUE;
5663       }
5664     }
5665     for (vnp = cgp->mrna_list; vnp != NULL; vnp = vnp->next) {
5666       if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) {
5667         return TRUE;
5668       }
5669     }
5670     for (vnp = cgp->prot_list; vnp != NULL; vnp = vnp->next) {
5671       if (DoesFeatureMatchLocationConstraint (vnp->data.ptrvalue, constraint)) {
5672         return TRUE;
5673       }
5674     }
5675     return FALSE;
5676   } else if (choice == OBJ_BIOSEQ) {
5677     return DoesBioseqMatchLocationConstraint((BioseqPtr)data, constraint);
5678   } else {
5679     return FALSE;
5680   }
5681 }
5682 
5683 
IsTextMarkerEmpty(TextMarkerPtr marker)5684 NLM_EXTERN Boolean IsTextMarkerEmpty (TextMarkerPtr marker)
5685 {
5686   CharPtr cp;
5687   Boolean rval = FALSE;
5688 
5689   if (marker == NULL) {
5690     rval = TRUE;
5691   } else if (marker->choice == TextMarker_free_text) {
5692     cp = (CharPtr) marker->data.ptrvalue;
5693     if (cp == NULL || *cp == 0) {
5694       rval = TRUE;
5695     }
5696   }
5697   return rval;
5698 }
5699 
5700 
MakeTextTextMarker(CharPtr text)5701 NLM_EXTERN TextMarkerPtr MakeTextTextMarker (CharPtr text)
5702 {
5703   TextMarkerPtr text_marker = ValNodeNew (NULL);
5704 
5705   text_marker->choice = TextMarker_free_text;
5706   text_marker->data.ptrvalue = StringSave (text);
5707   return text_marker;
5708 }
5709 
5710 
5711 
FindTextMarker(CharPtr str,Int4Ptr len,TextMarkerPtr marker,Boolean case_sensitive,Boolean whole_word)5712 static CharPtr FindTextMarker(CharPtr str, Int4Ptr len, TextMarkerPtr marker, Boolean case_sensitive, Boolean whole_word)
5713 {
5714   CharPtr search;
5715   CharPtr rval = NULL;
5716   Int4    search_len = 0;
5717   Int4    tmp;
5718   CharPtr digits = "0123456789";
5719   CharPtr letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
5720 
5721   if (str == NULL)
5722   {
5723     return NULL;
5724   }
5725 
5726   if (marker == NULL)
5727   {
5728     if (len != NULL)
5729     {
5730       *len = StringLen (str);
5731     }
5732     rval = str;
5733   }
5734   else if (marker->choice == TextMarker_free_text)
5735   {
5736     search = (CharPtr) marker->data.ptrvalue;
5737     if (search == NULL || search[0] == 0)
5738     {
5739       if (len != NULL)
5740       {
5741         *len = StringLen (str);
5742       }
5743       rval = str;
5744     }
5745     else
5746     {
5747       if (case_sensitive)
5748       {
5749         rval = StringSearch (str, search);
5750       }
5751       else
5752       {
5753         rval = StringISearch (str, search);
5754       }
5755 
5756       if (rval != NULL)
5757       {
5758         search_len = StringLen (search);
5759 
5760         if (whole_word && ! IsWholeWordMatch (str, rval, search_len))
5761         {
5762           rval = NULL;
5763         }
5764         else
5765         {
5766           if (len != NULL)
5767           {
5768             *len = search_len;
5769           }
5770         }
5771       }
5772     }
5773   }
5774   else if (marker->choice == TextMarker_digits)
5775   {
5776     tmp = StringCSpn(str, digits);
5777     if (*(str + tmp) != 0)
5778     {
5779       rval = str + tmp;
5780       if (len != NULL)
5781       {
5782         *len = StringSpn (rval, digits);
5783       }
5784     }
5785   }
5786   else if (marker->choice == TextMarker_letters)
5787   {
5788     tmp = StringCSpn(str, letters);
5789     if (*(str + tmp) != 0)
5790     {
5791       rval = str + tmp;
5792       if (len != NULL)
5793       {
5794         *len = StringSpn (rval, letters);
5795       }
5796     }
5797   }
5798   return rval;
5799 }
5800 
5801 
5802 static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit);
5803 
5804 /* for parsing and editing */
ApplyTextTransformsToString(CharPtr PNTR str,ValNodePtr transform_list)5805 NLM_EXTERN void ApplyTextTransformsToString (CharPtr PNTR str, ValNodePtr transform_list)
5806 {
5807   CharPtr tmp;
5808 
5809   if (str == NULL || *str == NULL) {
5810     return;
5811   }
5812 
5813   while (transform_list != NULL) {
5814     switch (transform_list->choice) {
5815       case TextTransform_edit:
5816         tmp = ApplyEditToString (*str, transform_list->data.ptrvalue);
5817         *str = MemFree (*str);
5818         *str = tmp;
5819         break;
5820       case TextTransform_caps:
5821         FixCapitalizationInString (str, transform_list->data.intvalue, NULL);
5822         break;
5823       case TextTransform_remove:
5824         RemoveTextPortionFromString (*str, (TextPortionPtr)transform_list->data.ptrvalue);
5825         break;
5826     }
5827     transform_list = transform_list->next;
5828   }
5829 }
5830 
5831 
IsTextPortionEmpty(TextPortionPtr text_portion)5832 static Boolean IsTextPortionEmpty (TextPortionPtr text_portion)
5833 {
5834   if (text_portion == NULL
5835       || (IsTextMarkerEmpty (text_portion->left_marker)
5836           && IsTextMarkerEmpty (text_portion->right_marker))) {
5837     return TRUE;
5838   } else {
5839     return FALSE;
5840   }
5841 }
5842 
5843 
IsTextTransformEmpty(ValNodePtr vnp)5844 NLM_EXTERN Boolean IsTextTransformEmpty (ValNodePtr vnp)
5845 {
5846   Boolean rval = TRUE;
5847   FieldEditPtr edit;
5848 
5849   if (vnp == NULL) {
5850     return TRUE;
5851   }
5852   switch (vnp->choice) {
5853     case TextTransform_edit:
5854       if ((edit = (FieldEditPtr) vnp->data.ptrvalue) != NULL
5855         && edit->find_txt != NULL) {
5856         rval = FALSE;
5857       }
5858       break;
5859     case TextTransform_caps:
5860       if (vnp->data.intvalue > Cap_change_none) {
5861         rval = FALSE;
5862       }
5863       break;
5864     case TextTransform_remove:
5865       if (!IsTextPortionEmpty (vnp->data.ptrvalue)) {
5866         rval = FALSE;
5867       }
5868       break;
5869   }
5870   return rval;
5871 }
5872 
5873 
GetTextPortionFromString(CharPtr str,TextPortionPtr text_portion)5874 NLM_EXTERN CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion)
5875 {
5876   CharPtr portion = NULL;
5877   CharPtr found_start, found_end;
5878   Int4    left_len = 0, right_len = 0, found_len;
5879 
5880   if (StringHasNoText (str)) {
5881     return NULL;
5882   }
5883   if (text_portion == NULL) {
5884     return StringSave (str);
5885   }
5886 
5887   found_start = FindTextMarker(str, &left_len, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word);
5888 
5889   if (found_start == NULL)
5890   {
5891     return NULL;
5892   }
5893 
5894   if (!IsTextMarkerEmpty(text_portion->left_marker))
5895   {
5896     if (text_portion->inside && !text_portion->include_left)
5897     {
5898       found_start += left_len;
5899     }
5900     else if (!text_portion->inside && text_portion->include_left)
5901     {
5902       found_start += left_len;
5903     }
5904   }
5905 
5906   found_end = FindTextMarker (found_start, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word);
5907   if (found_end == NULL) {
5908     return NULL;
5909   }
5910 
5911 
5912   if ((text_portion->inside && text_portion->include_right)
5913       || (!text_portion->inside && !text_portion->include_right)
5914       || IsTextMarkerEmpty(text_portion->right_marker))
5915   {
5916     found_end += right_len;
5917   }
5918 
5919   found_len = found_end - found_start;
5920 
5921   if (found_len > 0)
5922   {
5923     portion = (CharPtr) MemNew (sizeof (Char) * (found_len + 1));
5924     StringNCpy (portion, found_start, found_len);
5925     portion[found_len] = 0;
5926   }
5927   return portion;
5928 }
5929 
5930 
5931 
FindTextPortionLocationInString(CharPtr str,TextPortionPtr text_portion)5932 static CharPtr FindTextPortionLocationInString (CharPtr str, TextPortionPtr text_portion)
5933 {
5934   CharPtr start, stop;
5935 
5936   if (str == NULL || text_portion == NULL) return FALSE;
5937 
5938   start = FindTextMarker(str, NULL, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word);
5939 
5940   if (start != NULL && !IsTextMarkerEmpty (text_portion->right_marker))
5941   {
5942     stop = FindTextMarker(start, NULL, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word);
5943     if (stop == NULL)
5944     {
5945       start = NULL;
5946     }
5947   }
5948   return start;
5949 }
5950 
5951 
ReplaceStringForParse(CharPtr src_text,TextPortionPtr text_portion)5952 static Boolean ReplaceStringForParse(CharPtr src_text, TextPortionPtr text_portion)
5953 {
5954   CharPtr src, dst;
5955   Int4 right_len;
5956 
5957   if (src_text == NULL || text_portion == NULL) {
5958     return FALSE;
5959   }
5960 
5961   dst = FindTextPortionLocationInString (src_text, text_portion);
5962   if (dst == NULL) return FALSE;
5963   if (IsTextMarkerEmpty (text_portion->right_marker)) {
5964     *dst = 0;
5965   } else {
5966     src = FindTextMarker(dst, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word);
5967     if (src != NULL) {
5968       if (text_portion->include_right) {
5969         src += right_len;
5970       }
5971       while (*src != 0) {
5972         *dst = *src;
5973         dst++;
5974         src++;
5975       }
5976       *dst = 0;
5977     }
5978   }
5979   return TRUE;
5980 }
5981 
5982 
RemoveTextPortionFromString(CharPtr str,TextPortionPtr text_portion)5983 NLM_EXTERN Boolean RemoveTextPortionFromString (CharPtr str, TextPortionPtr text_portion)
5984 {
5985   CharPtr before = NULL, after = NULL, src, dst;
5986   Boolean rval = FALSE;
5987   Int4    left_len, right_len;
5988 
5989   if (str == NULL || text_portion == NULL) {
5990     return FALSE;
5991   }
5992 
5993   if (text_portion->inside) {
5994     rval = ReplaceStringForParse (str, text_portion);
5995   } else {
5996     if ((before = FindTextMarker (str, &left_len, text_portion->left_marker,
5997                                   text_portion->case_sensitive, text_portion->whole_word)) != NULL
5998         && (after = FindTextMarker (before, &right_len, text_portion->right_marker,
5999                                   text_portion->case_sensitive, text_portion->whole_word)) != NULL) {
6000       if (!IsTextMarkerEmpty (text_portion->right_marker)) {
6001         if (text_portion->include_right) {
6002           *after = 0;
6003         } else {
6004           *(after + right_len) = 0;
6005         }
6006         rval = TRUE;
6007       }
6008       if (!IsTextMarkerEmpty (text_portion->left_marker)) {
6009         dst = str;
6010         if (text_portion->include_left) {
6011           src = before + left_len;
6012         } else {
6013           src = before;
6014         }
6015         while (*src != 0) {
6016           *dst = *src;
6017           ++dst;
6018           ++src;
6019         }
6020         *dst = 0;
6021         rval = TRUE;
6022       }
6023     }
6024   }
6025   return rval;
6026 }
6027 
6028 
6029 /* generic functions for setting field values */
SetStringValue(CharPtr PNTR existing_val,CharPtr new_val,Uint2 existing_text)6030 NLM_EXTERN Boolean SetStringValue (CharPtr PNTR existing_val, CharPtr new_val, Uint2 existing_text)
6031 {
6032   Boolean rval = FALSE;
6033   Int4 len;
6034   CharPtr tmp;
6035 
6036   if (existing_val == NULL) {
6037     return FALSE;
6038   }
6039 
6040   if (StringHasNoText (*existing_val)) {
6041     *existing_val = MemFree (*existing_val);
6042     *existing_val = StringSave (new_val);
6043     rval = TRUE;
6044   } else {
6045     if (existing_text != ExistingTextOption_replace_old
6046         && (new_val == NULL || *new_val == 0)) {
6047       return FALSE;
6048     }
6049     if (existing_text == ExistingTextOption_replace_old
6050         && StringCmp (*existing_val, new_val) == 0) {
6051       return FALSE;
6052     }
6053 
6054     switch (existing_text) {
6055       case ExistingTextOption_replace_old :
6056         *existing_val = MemFree (*existing_val);
6057         *existing_val = StringSave (new_val);
6058         rval = TRUE;
6059         break;
6060       case ExistingTextOption_append_semi :
6061         len = StringLen (new_val) + StringLen (*existing_val) + 3;
6062         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6063         if (tmp != NULL) {
6064           sprintf (tmp, "%s; %s", *existing_val, new_val);
6065           MemFree (*existing_val);
6066           *existing_val = tmp;
6067           rval = TRUE;
6068         }
6069         break;
6070       case ExistingTextOption_append_space :
6071         len = StringLen (new_val) + StringLen (*existing_val) + 2;
6072         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6073         if (tmp != NULL) {
6074           sprintf (tmp, "%s %s", *existing_val, new_val);
6075           MemFree (*existing_val);
6076           *existing_val = tmp;
6077           rval = TRUE;
6078         }
6079         break;
6080       case ExistingTextOption_append_colon :
6081         len = StringLen (new_val) + StringLen (*existing_val) + 3;
6082         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6083         if (tmp != NULL) {
6084           sprintf (tmp, "%s: %s", *existing_val, new_val);
6085           MemFree (*existing_val);
6086           *existing_val = tmp;
6087           rval = TRUE;
6088         }
6089         break;
6090       case ExistingTextOption_append_comma :
6091         len = StringLen (new_val) + StringLen (*existing_val) + 3;
6092         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6093         if (tmp != NULL) {
6094           sprintf (tmp, "%s, %s", *existing_val, new_val);
6095           MemFree (*existing_val);
6096           *existing_val = tmp;
6097           rval = TRUE;
6098         }
6099         break;
6100       case ExistingTextOption_append_none :
6101         len = StringLen (new_val) + StringLen (*existing_val) + 1;
6102         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6103         if (tmp != NULL) {
6104           sprintf (tmp, "%s%s", *existing_val, new_val);
6105           MemFree (*existing_val);
6106           *existing_val = tmp;
6107           rval = TRUE;
6108         }
6109         break;
6110       case ExistingTextOption_prefix_semi :
6111         len = StringLen (new_val) + StringLen (*existing_val) + 3;
6112         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6113         if (tmp != NULL) {
6114           sprintf (tmp, "%s; %s", new_val, *existing_val);
6115           MemFree (*existing_val);
6116           *existing_val = tmp;
6117           rval = TRUE;
6118         }
6119         break;
6120       case ExistingTextOption_prefix_space :
6121         len = StringLen (new_val) + StringLen (*existing_val) + 2;
6122         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6123         if (tmp != NULL) {
6124           sprintf (tmp, "%s %s", new_val, *existing_val);
6125           MemFree (*existing_val);
6126           *existing_val = tmp;
6127           rval = TRUE;
6128         }
6129         break;
6130       case ExistingTextOption_prefix_colon :
6131         len = StringLen (new_val) + StringLen (*existing_val) + 3;
6132         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6133         if (tmp != NULL) {
6134           sprintf (tmp, "%s: %s", new_val, *existing_val);
6135           MemFree (*existing_val);
6136           *existing_val = tmp;
6137           rval = TRUE;
6138         }
6139         break;
6140       case ExistingTextOption_prefix_comma :
6141         len = StringLen (new_val) + StringLen (*existing_val) + 3;
6142         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6143         if (tmp != NULL) {
6144           sprintf (tmp, "%s, %s", new_val, *existing_val);
6145           MemFree (*existing_val);
6146           *existing_val = tmp;
6147           rval = TRUE;
6148         }
6149         break;
6150       case ExistingTextOption_prefix_none :
6151         len = StringLen (new_val) + StringLen (*existing_val) + 1;
6152         tmp = (CharPtr) MemNew (sizeof (Char) * len);
6153         if (tmp != NULL) {
6154           sprintf (tmp, "%s%s", new_val, *existing_val);
6155           MemFree (*existing_val);
6156           *existing_val = tmp;
6157           rval = TRUE;
6158         }
6159         break;
6160       case ExistingTextOption_leave_old :
6161         rval = FALSE;
6162     }
6163   }
6164   return rval;
6165 }
6166 
6167 
6168 /* NOTE: The following functions, GetTwoFieldSubfield, SetTwoFieldSubfield, and RemoveTwoFieldSubfield,
6169  * all assume that if only one field is present, it is subfield 1.
6170  */
GetTwoFieldSubfield(CharPtr str,Uint1 subfield)6171 static CharPtr GetTwoFieldSubfield (CharPtr str, Uint1 subfield)
6172 {
6173   CharPtr cp;
6174   CharPtr new_val = NULL;
6175   Int4    len;
6176 
6177   if (StringHasNoText (str) || subfield > 2) {
6178     return NULL;
6179   }
6180   if (subfield == 0) {
6181     new_val = StringSave (str);
6182   } else {
6183     cp = StringChr (str, ':');
6184     if (cp == NULL) {
6185       if (subfield == 1) {
6186         new_val = StringSave (str);
6187       } else {
6188         new_val = NULL;
6189       }
6190     } else {
6191       if (subfield == 1) {
6192         len = cp - str + 1;
6193         new_val = (CharPtr) MemNew (sizeof (Char) * len);
6194         StringNCpy (new_val, str, len - 1);
6195         new_val[len - 1] = 0;
6196       } else if (!StringHasNoText (cp + 1)) {
6197         new_val = StringSave (cp + 1);
6198       }
6199     }
6200   }
6201   return new_val;
6202 }
6203 
6204 
MakeValFromTwoFields(CharPtr PNTR fields)6205 static CharPtr MakeValFromTwoFields (CharPtr PNTR fields)
6206 {
6207   Boolean empty1, empty2;
6208   CharPtr val = NULL;
6209 
6210   if (fields == NULL) return NULL;
6211 
6212   empty1 = StringHasNoText (fields[0]);
6213   empty2 = StringHasNoText (fields[1]);
6214   if (empty1 && empty2) {
6215     val = NULL;
6216   } else if (empty1) {
6217     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2));
6218     sprintf (val, ":%s", fields[1]);
6219   } else if (empty2) {
6220     val = StringSave (fields[0]);
6221   } else {
6222     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[1]) + 2));
6223     sprintf (val, "%s:%s", fields[0], fields[1]);
6224   }
6225   return val;
6226 }
6227 
6228 
RemoveTwoFieldSubfield(CharPtr PNTR existing_val,Uint1 subfield)6229 static Boolean RemoveTwoFieldSubfield (CharPtr PNTR existing_val, Uint1 subfield)
6230 {
6231   Boolean rval = FALSE;
6232   CharPtr fields[2];
6233 
6234   if (existing_val == NULL || StringHasNoText (*existing_val) || subfield > 2) {
6235     return FALSE;
6236   }
6237   if (subfield == 0) {
6238     *existing_val = MemFree (*existing_val);
6239     rval = TRUE;
6240   } else {
6241     fields[0] = GetTwoFieldSubfield (*existing_val, 1);
6242     fields[1] = GetTwoFieldSubfield (*existing_val, 2);
6243     if (!StringHasNoText (fields[subfield - 1])) {
6244       fields[subfield - 1] = MemFree (fields[subfield - 1]);
6245       *existing_val = MemFree (*existing_val);
6246       *existing_val = MakeValFromTwoFields (fields);
6247       rval = TRUE;
6248     }
6249     fields[0] = MemFree (fields[0]);
6250     fields[1] = MemFree (fields[1]);
6251   }
6252   return rval;
6253 }
6254 
6255 
SetTwoFieldSubfield(CharPtr PNTR existing_val,Int4 subfield,CharPtr new_field,Uint2 existing_text)6256 static Boolean SetTwoFieldSubfield (CharPtr PNTR existing_val, Int4 subfield, CharPtr new_field, Uint2 existing_text)
6257 {
6258   Boolean rval = FALSE;
6259   CharPtr fields[2];
6260 
6261   if (existing_val == NULL || subfield > 2 || StringHasNoText (new_field)) {
6262     return FALSE;
6263   }
6264   if (subfield == 0) {
6265     rval = SetStringValue (existing_val, new_field, existing_text);
6266   } else {
6267     fields[0] = GetTwoFieldSubfield (*existing_val, 1);
6268     fields[1] = GetTwoFieldSubfield (*existing_val, 2);
6269     if (SetStringValue (&(fields[subfield - 1]), new_field, existing_text)) {
6270       *existing_val = MemFree (*existing_val);
6271       *existing_val = MakeValFromTwoFields (fields);
6272       rval = TRUE;
6273     }
6274     fields[0] = MemFree (fields[0]);
6275     fields[1] = MemFree (fields[1]);
6276   }
6277   return rval;
6278 }
6279 
6280 
6281 /* NOTE: The following functions, GetThreeFieldSubfield, SetThreeFieldSubfield, and RemoveThreeFieldSubfield
6282  * all assume that if only one field is present, it is subfield 3.  If two fields are present, they are subfields 1 and 3.
6283  */
GetThreeFieldSubfield(CharPtr str,Uint1 subfield)6284 static CharPtr GetThreeFieldSubfield (CharPtr str, Uint1 subfield)
6285 {
6286   CharPtr cp, cp2;
6287   Int4    num_colons = 0;
6288   CharPtr new_val = NULL;
6289 
6290   if (StringHasNoText (str)) {
6291     return NULL;
6292   }
6293 
6294   cp = StringChr (str, ':');
6295   while (cp != NULL) {
6296     num_colons ++;
6297     cp = StringChr (cp + 1, ':');
6298   }
6299 
6300   if (subfield == 0) {
6301     new_val = StringSave (str);
6302   } else if (subfield == 1) {
6303     if (num_colons == 0) {
6304       return NULL;
6305     } else {
6306       cp = StringChr (str, ':');
6307       new_val = (CharPtr) MemNew (sizeof (Char) * (cp - str + 1));
6308       StringNCpy (new_val, str, cp - str);
6309       new_val[cp - str] = 0;
6310     }
6311   } else if (subfield == 2) {
6312     if (num_colons == 0 || num_colons == 1) {
6313       return NULL;
6314     } else {
6315       cp = StringChr (str, ':');
6316       cp2 = StringChr (cp + 1, ':');
6317       new_val = (CharPtr) MemNew (sizeof (Char) * (cp2 - cp));
6318       StringNCpy (new_val, cp + 1, cp2 - cp - 1);
6319       new_val[cp2 - cp - 1] = 0;
6320     }
6321   } else {
6322     if (num_colons == 0) {
6323       new_val = StringSave (str);
6324     } else {
6325       cp = StringRChr (str, ':');
6326       new_val = StringSave (cp + 1);
6327     }
6328   }
6329   return new_val;
6330 }
6331 
6332 
MakeValFromThreeFields(CharPtr PNTR fields)6333 static CharPtr MakeValFromThreeFields (CharPtr PNTR fields)
6334 {
6335   Int4 i;
6336   Boolean empty[3];
6337   CharPtr val = NULL;
6338 
6339   if (fields == NULL) return NULL;
6340 
6341   for (i = 0; i < 3; i++) {
6342     empty[i] = StringHasNoText (fields[i]);
6343   }
6344 
6345 
6346   if (empty[0] && empty[1] && empty[2]) {
6347     /* do nothing, value is now empty */
6348   } else if (empty[0] && empty[1]) {
6349     val = StringSave (fields[2]);
6350   } else if (empty[0] && empty[2]) {
6351     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + 2));
6352     sprintf (val, ":%s:", fields[1]);
6353   } else if (empty[1] && empty[2]) {
6354     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + 2));
6355     sprintf (val, "%s:", fields[0]);
6356   } else if (empty[0]) {
6357     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[1]) + StringLen (fields[2]) + 3));
6358     sprintf (val, ":%s:%s", fields[1], fields[2]);
6359   } else if (empty[1]) {
6360     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[2]) + 3));
6361     sprintf (val, "%s:%s", fields[0], fields[2]);
6362   } else if (empty[2]) {
6363     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[1]) + 3));
6364     sprintf (val, "%s:%s:", fields[0], fields[1]);
6365   } else {
6366     val = (CharPtr) MemNew (sizeof (Char) * (StringLen (fields[0]) + StringLen (fields[1]) + StringLen (fields[2]) + 3));
6367     sprintf (val, "%s:%s:%s", fields[0], fields[1], fields[2]);
6368   }
6369   return val;
6370 }
6371 
6372 
RemoveThreeFieldSubfield(CharPtr PNTR existing_val,Uint1 subfield)6373 static Boolean RemoveThreeFieldSubfield (CharPtr PNTR existing_val, Uint1 subfield)
6374 {
6375   Int4    i;
6376   CharPtr fields[3];
6377   Boolean rval = FALSE;
6378 
6379   if (existing_val == NULL || subfield > 3 || StringHasNoText (*existing_val)) return FALSE;
6380 
6381   if (subfield == 0) {
6382     *existing_val = MemFree (*existing_val);
6383     rval = TRUE;
6384   } else {
6385     for (i = 0; i < 3; i++) {
6386       fields[i] = GetThreeFieldSubfield (*existing_val, i + 1);
6387     }
6388     if (!StringHasNoText (fields[subfield - 1])) {
6389       fields[subfield - 1] = MemFree (fields[subfield - 1]);
6390       *existing_val = MakeValFromThreeFields (fields);
6391       rval = TRUE;
6392     }
6393     for (i = 0; i < 3; i++) {
6394       fields[i] = MemFree (fields[i]);
6395     }
6396   }
6397   return rval;
6398 }
6399 
6400 
SetThreeFieldSubfield(CharPtr PNTR existing_val,Int4 subfield,CharPtr new_field,Uint2 existing_text)6401 static Boolean SetThreeFieldSubfield (CharPtr PNTR existing_val, Int4 subfield, CharPtr new_field, Uint2 existing_text)
6402 {
6403   Int4    i;
6404   CharPtr fields[3];
6405   Boolean rval = FALSE;
6406 
6407   if (existing_val == NULL || StringHasNoText (new_field) || subfield < 0 || subfield > 3) return FALSE;
6408 
6409   if (subfield == 0) {
6410     rval = SetStringValue (existing_val, new_field, existing_text);
6411   } else {
6412     for (i = 0; i < 3; i++) {
6413       fields[i] = GetThreeFieldSubfield (*existing_val, i + 1);
6414     }
6415     if (SetStringValue (&(fields[subfield - 1]), new_field, existing_text)) {
6416       *existing_val = MemFree (*existing_val);
6417       *existing_val = MakeValFromThreeFields (fields);
6418       rval = TRUE;
6419     }
6420     for (i = 0; i < 3; i++) {
6421       fields[i] = MemFree (fields[i]);
6422     }
6423   }
6424   return rval;
6425 }
6426 
6427 
6428 NLM_EXTERN Boolean
SetStringsInValNodeStringList(ValNodePtr PNTR list,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)6429 SetStringsInValNodeStringList
6430 (ValNodePtr PNTR list,
6431  StringConstraintPtr scp,
6432  CharPtr new_val,
6433  Uint2   existing_text)
6434 {
6435   ValNodePtr vnp;
6436   CharPtr    cp;
6437   Boolean rval = FALSE, found = FALSE;
6438 
6439   if (list == NULL)
6440   {
6441     return FALSE;
6442   }
6443 
6444   if (*list == NULL && (scp == NULL || StringHasNoText (scp->match_text))) {
6445     ValNodeAddPointer (list, 0, StringSave (new_val));
6446     rval = TRUE;
6447   } else if (existing_text == ExistingTextOption_add_qual) {
6448       for (vnp = *list; vnp != NULL; vnp = vnp->next)
6449       {
6450         if (StringCmp (new_val, vnp->data.ptrvalue) == 0) {
6451           found = TRUE;
6452           break;
6453         }
6454       }
6455       if (!found) {
6456         ValNodeAddPointer (list, 0, StringSave (new_val));
6457         rval = TRUE;
6458       }
6459   } else if (existing_text == ExistingTextOption_replace_old) {
6460     found = FALSE;
6461     for (vnp = *list; vnp != NULL; vnp = vnp->next) {
6462       cp = (CharPtr) vnp->data.ptrvalue;
6463       if (DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
6464         rval |= SetStringValue (&cp, new_val, existing_text);
6465         vnp->data.ptrvalue = cp;
6466         found = TRUE;
6467       }
6468     }
6469     if (!found && DoesStringListMatchConstraint (*list, scp)) {
6470       *list = ValNodeFreeData (*list);
6471       vnp = ValNodeNew (NULL);
6472       vnp->data.ptrvalue = StringSave (new_val);
6473       *list = vnp;
6474       rval = TRUE;
6475     }
6476   } else if (existing_text == ExistingTextOption_leave_old) {
6477     rval = FALSE;
6478   } else {
6479     for (vnp = *list; vnp != NULL; vnp = vnp->next)
6480     {
6481       cp = (CharPtr) vnp->data.ptrvalue;
6482       if (DoesStringMatchConstraint (cp, scp)) {
6483         rval |= SetStringValue (&cp, new_val, existing_text);
6484         vnp->data.ptrvalue = cp;
6485       }
6486     }
6487   }
6488   return rval;
6489 }
6490 
6491 
SetStringInGBQualList(GBQualPtr PNTR list,ValNodePtr field,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)6492 NLM_EXTERN Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
6493 {
6494   Boolean rval = FALSE, does_match, any_found = FALSE;
6495   Int4 gbqual, subfield;
6496   CharPtr qual_name = NULL, tmp;
6497   GBQualPtr gbq, last_gbq = NULL;
6498 
6499   if (field == NULL) return FALSE;
6500 
6501   if (field->choice == FeatQualChoice_legal_qual)
6502   {
6503     gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield);
6504     if (gbqual > -1) {
6505       qual_name = ParFlat_GBQual_names [gbqual].name;
6506       if (existing_text == ExistingTextOption_add_qual) {
6507         gbq = GBQualNew ();
6508         gbq->qual = StringSave (qual_name);
6509         gbq->val = StringSave (new_val);
6510         if (last_gbq == NULL) {
6511           *list = gbq;
6512         } else {
6513           last_gbq->next = gbq;
6514         }
6515         rval = TRUE;
6516       } else {
6517         for (gbq = *list; gbq != NULL; gbq = gbq->next) {
6518           if (StringCmp (gbq->qual, qual_name) == 0) {
6519             if (subfield > 0) {
6520               does_match = TRUE;
6521               if (!IsStringConstraintEmpty (scp)) {
6522                 tmp = GetTwoFieldSubfield (gbq->val, subfield);
6523                 does_match = DoesStringMatchConstraint (tmp, scp);
6524                 tmp = MemFree (tmp);
6525               }
6526               if (does_match) {
6527                 rval |= SetTwoFieldSubfield (&(gbq->val), subfield, new_val, existing_text);
6528               }
6529             } else if (DoesStringMatchConstraint (gbq->val, scp)) {
6530               rval |= SetStringValue (&(gbq->val), new_val, existing_text);
6531             }
6532             any_found = TRUE;
6533           }
6534           last_gbq = gbq;
6535         }
6536         if (!rval && (scp == NULL || scp->match_text == NULL || (any_found == FALSE && scp->not_present))) {
6537           gbq = GBQualNew ();
6538           gbq->qual = StringSave (qual_name);
6539           gbq->val = StringSave (new_val);
6540           if (last_gbq == NULL) {
6541             *list = gbq;
6542           } else {
6543             last_gbq->next = gbq;
6544           }
6545           rval = TRUE;
6546         }
6547       }
6548     }
6549   } else if (field->choice == FeatQualChoice_illegal_qual) {
6550     for (gbq = *list; gbq != NULL; gbq = gbq->next) {
6551       if (DoesStringMatchConstraint (gbq->qual, field->data.ptrvalue)
6552           && DoesStringMatchConstraint (gbq->val, scp)) {
6553         rval |= SetStringValue (&(gbq->val), new_val, existing_text);
6554       }
6555     }
6556   }
6557 
6558   return rval;
6559 }
6560 
6561 
SetStringInRNAQualList(RNAQualPtr PNTR list,CharPtr qual_name,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)6562 static Boolean SetStringInRNAQualList (RNAQualPtr PNTR list, CharPtr qual_name, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
6563 {
6564   Boolean rval = FALSE;
6565   RNAQualPtr rq, last_rq = NULL;
6566 
6567   if (StringHasNoText (qual_name)) {
6568     return FALSE;
6569   }
6570 
6571   if (existing_text == ExistingTextOption_add_qual) {
6572     rq = RNAQualNew ();
6573     rq->qual = StringSave (qual_name);
6574     rq->val = StringSave (new_val);
6575     if (last_rq == NULL) {
6576       *list = rq;
6577     } else {
6578       last_rq->next = rq;
6579     }
6580     rval = TRUE;
6581   } else {
6582     for (rq = *list; rq != NULL; rq = rq->next) {
6583       if (StringCmp (rq->qual, qual_name) == 0 && DoesStringMatchConstraint (rq->val, scp)) {
6584         rval |= SetStringValue (&(rq->val), new_val, existing_text);
6585       }
6586       last_rq = rq;
6587     }
6588     if (!rval && (scp == NULL || scp->match_text == NULL)) {
6589       rq = RNAQualNew ();
6590       rq->qual = StringSave (qual_name);
6591       rq->val = StringSave (new_val);
6592       if (last_rq == NULL) {
6593         *list = rq;
6594       } else {
6595         last_rq->next = rq;
6596       }
6597       rval = TRUE;
6598     }
6599   }
6600 
6601   return rval;
6602 }
6603 
6604 
GetFirstRNAQualMatchName(RNAQualPtr qual,CharPtr qual_name,StringConstraintPtr scp)6605 static CharPtr GetFirstRNAQualMatchName (RNAQualPtr qual, CharPtr qual_name, StringConstraintPtr scp)
6606 {
6607   CharPtr str = NULL;
6608   while (qual != NULL && str == NULL) {
6609     if (StringCmp (qual->qual, qual_name) == 0
6610         && !StringHasNoText (qual->val)
6611         && DoesStringMatchConstraint (qual->val, scp)) {
6612       str = StringSave (qual->val);
6613     }
6614     qual = qual->next;
6615   }
6616   return str;
6617 }
6618 
6619 
RemoveRNAQualMatch(RNAQualPtr PNTR list,CharPtr qual_name,StringConstraintPtr scp)6620 static Boolean RemoveRNAQualMatch (RNAQualPtr PNTR list, CharPtr qual_name, StringConstraintPtr scp)
6621 {
6622   RNAQualPtr qual_prev = NULL, qual_next, qual;
6623   Boolean   rval = FALSE;
6624 
6625   if (list == NULL) return FALSE;
6626 
6627   qual = *list;
6628   while (qual != NULL) {
6629     qual_next = qual->next;
6630     if (StringICmp (qual->qual, qual_name) == 0 && DoesStringMatchConstraint (qual->val, scp)) {
6631       if (qual_prev == NULL) {
6632         *list = qual->next;
6633       } else {
6634         qual_prev->next = qual->next;
6635       }
6636       qual->next = NULL;
6637       qual = RNAQualFree (qual);
6638       rval = TRUE;
6639     } else {
6640       qual_prev = qual;
6641     }
6642     qual = qual_next;
6643   }
6644   return rval;
6645 }
6646 
6647 
SetInt2ValueWithString(Int2Ptr val,CharPtr val_str,Uint2 existing_text)6648 static Boolean SetInt2ValueWithString (Int2Ptr val, CharPtr val_str, Uint2 existing_text)
6649 {
6650   Char    num[15];
6651   CharPtr tmp = NULL;
6652   Boolean rval = FALSE;
6653 
6654   if (val == NULL) return FALSE;
6655 
6656   sprintf (num, "%d", *val);
6657   tmp = StringSave (num);
6658   if (SetStringValue (&tmp, val_str, existing_text)
6659       && StringIsAllDigits (tmp)) {
6660     *val = atoi (tmp);
6661     rval = TRUE;
6662   }
6663   tmp = MemFree (tmp);
6664   return rval;
6665 }
6666 
6667 
GetInt2ValueFromString(Int2 val,StringConstraintPtr scp)6668 static CharPtr GetInt2ValueFromString (Int2 val, StringConstraintPtr scp)
6669 {
6670   Char num[15];
6671 
6672   sprintf (num, "%d", val);
6673   if (DoesStringMatchConstraint (num, scp)) {
6674     return StringSave (num);
6675   } else {
6676     return NULL;
6677   }
6678 }
6679 
6680 
SetObjectIdString(ObjectIdPtr oip,CharPtr value,Uint2 existing_text)6681 NLM_EXTERN Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 existing_text)
6682 {
6683   Boolean rval = FALSE;
6684   Char    num[15];
6685   CharPtr tmp = NULL;
6686 
6687   if (oip == NULL) {
6688     return FALSE;
6689   }
6690 
6691   if (oip->id > 0) {
6692     sprintf (num, "%d", oip->id);
6693     tmp = StringSave (num);
6694   } else {
6695     tmp = StringSaveNoNull (oip->str);
6696   }
6697   if (SetStringValue (&tmp, value, existing_text)) {
6698     oip->str = MemFree (oip->str);
6699     oip->id = 0;
6700     if (StringIsAllDigits (tmp) && StringLen (tmp) < 8 && *tmp != '0') {
6701       oip->id = atoi (tmp);
6702     } else {
6703       oip->str = tmp;
6704       tmp = NULL;
6705     }
6706     rval = TRUE;
6707   }
6708   tmp = MemFree (tmp);
6709   return rval;
6710 }
6711 
6712 
GetObjectIdString(ObjectIdPtr oip)6713 NLM_EXTERN CharPtr GetObjectIdString (ObjectIdPtr oip)
6714 {
6715   CharPtr rval = NULL;
6716   Char    num[15];
6717 
6718   if (oip == NULL) {
6719     return FALSE;
6720   }
6721 
6722   if (oip->id > 0) {
6723     sprintf (num, "%d", oip->id);
6724     rval = StringSave (num);
6725   } else {
6726     rval = StringSaveNoNull (oip->str);
6727   }
6728   return rval;
6729 }
6730 
6731 
DoesNumberMatchStringConstraint(Int4 num,StringConstraintPtr scp)6732 static Boolean DoesNumberMatchStringConstraint (Int4 num, StringConstraintPtr scp)
6733 {
6734   Char tmp[15];
6735 
6736   if (IsStringConstraintEmpty (scp)) {
6737     return TRUE;
6738   }
6739   sprintf (tmp, "%d", num);
6740   return DoesStringMatchConstraint(tmp, scp);
6741 }
6742 
6743 
DoesObjectIdMatchStringConstraint(ObjectIdPtr oip,StringConstraintPtr scp)6744 static Boolean DoesObjectIdMatchStringConstraint (ObjectIdPtr oip, StringConstraintPtr scp)
6745 {
6746   Boolean rval = FALSE;
6747 
6748   if (oip == NULL) {
6749     return FALSE;
6750   } else if (IsStringConstraintEmpty (scp)) {
6751     return TRUE;
6752   } else if (oip->id > 0) {
6753     rval = DoesNumberMatchStringConstraint (oip->id, scp);
6754   } else {
6755     rval = DoesStringMatchConstraint (oip->str, scp);
6756   }
6757   return rval;
6758 }
6759 
6760 
6761 /* generic functions for getting string values */
GetDbtagStringLen(DbtagPtr db_tag)6762 static Int4 GetDbtagStringLen (DbtagPtr db_tag)
6763 {
6764   Int4 len;
6765 
6766   if (db_tag == NULL)
6767   {
6768     return 0;
6769   }
6770 
6771   len = StringLen (db_tag->db) + 2;
6772   if (db_tag->tag != NULL)
6773   {
6774     if (db_tag->tag->str != NULL)
6775     {
6776       len += StringLen (db_tag->tag->str);
6777     }
6778     else
6779     {
6780       len += 10;
6781     }
6782   }
6783   return len;
6784 }
6785 
6786 
GetDbtagString(DbtagPtr db_tag)6787 NLM_EXTERN CharPtr GetDbtagString (DbtagPtr db_tag)
6788 {
6789   Int4    len;
6790   CharPtr str;
6791 
6792   if (db_tag == NULL) {
6793     return NULL;
6794   }
6795 
6796   len = GetDbtagStringLen (db_tag);
6797   if (len == 0) {
6798     return NULL;
6799   }
6800 
6801   str = (CharPtr) MemNew (len * sizeof (Char));
6802   if (str != NULL) {
6803     StringCpy (str, db_tag->db);
6804     StringCat (str, ":");
6805     if (db_tag->tag != NULL) {
6806       if (db_tag->tag->str != NULL) {
6807         StringCat (str, db_tag->tag->str);
6808       } else {
6809         sprintf (str + StringLen (str), "%d", db_tag->tag->id);
6810       }
6811     }
6812   }
6813   return str;
6814 }
6815 
6816 
SetDbtagString(DbtagPtr db_tag,CharPtr value,Uint2 existing_text)6817 NLM_EXTERN Boolean SetDbtagString (DbtagPtr db_tag, CharPtr value, Uint2 existing_text)
6818 {
6819   Boolean rval = FALSE;
6820   CharPtr cp;
6821   Int4    dbxvalid;
6822   CharPtr tmp;
6823   CharPtr twoval;
6824 
6825   if (db_tag == NULL || StringHasNoText (value)) {
6826     return FALSE;
6827   }
6828 
6829   cp = StringChr (value, ':');
6830   if (cp == NULL) {
6831     tmp = StringSave (db_tag->db);
6832     if (SetStringValue (&tmp, value, existing_text)) {
6833       dbxvalid = DbxrefIsValid (tmp, NULL, NULL, NULL, NULL);
6834       if (dbxvalid != 0) {
6835         db_tag->db = MemFree (db_tag->db);
6836         db_tag->db = tmp;
6837         tmp = NULL;
6838         rval = TRUE;
6839       }
6840     }
6841     if (!rval) {
6842       if (db_tag->tag == NULL) {
6843         db_tag->tag = ObjectIdNew();
6844       }
6845       rval = SetObjectIdString (db_tag->tag, value, existing_text);
6846     }
6847     tmp = MemFree (tmp);
6848   } else {
6849     twoval = StringSave (value);
6850     cp = StringChr (twoval, ':');
6851     *cp = 0;
6852     cp++;
6853     rval = SetStringValue (&(db_tag->db), twoval, existing_text);
6854     if (db_tag->tag == NULL) {
6855       db_tag->tag = ObjectIdNew ();
6856     }
6857     rval |= SetObjectIdString (db_tag->tag, cp, existing_text);
6858     twoval = MemFree (twoval);
6859   }
6860   return rval;
6861 }
6862 
6863 
SetDbxrefString(ValNodePtr PNTR list,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)6864 static Boolean SetDbxrefString (ValNodePtr PNTR list, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
6865 {
6866   ValNodePtr vnp;
6867   Boolean    rval = FALSE, skip;
6868   DbtagPtr   dbtag;
6869   CharPtr    cp;
6870 
6871   if (list == NULL) {
6872     return FALSE;
6873   }
6874 
6875   if (existing_text == ExistingTextOption_add_qual
6876       || (*list == NULL && (scp == NULL || StringHasNoText (scp->match_text)))) {
6877     dbtag = DbtagNew ();
6878     rval = SetDbtagString (dbtag, value, existing_text);
6879     if (rval) {
6880       ValNodeAddPointer (list, 0, dbtag);
6881     } else {
6882       dbtag = DbtagFree (dbtag);
6883     }
6884   } else {
6885     for (vnp = *list; vnp != NULL; vnp = vnp->next) {
6886       skip = FALSE;
6887       if (scp != NULL) {
6888         cp = GetDbtagString (vnp->data.ptrvalue);
6889         if (!DoesStringMatchConstraint (cp, scp)) {
6890           skip = TRUE;
6891         }
6892         cp = MemFree (cp);
6893       }
6894       if (!skip) {
6895         rval |= SetDbtagString (vnp->data.ptrvalue, value, existing_text);
6896       }
6897     }
6898   }
6899   return rval;
6900 }
6901 
6902 
6903 
GetFirstValNodeStringMatch(ValNodePtr vnp,StringConstraintPtr scp)6904 static CharPtr GetFirstValNodeStringMatch (ValNodePtr vnp, StringConstraintPtr scp)
6905 {
6906   CharPtr str = NULL;
6907   while (vnp != NULL && str == NULL) {
6908     if (!StringHasNoText (vnp->data.ptrvalue)
6909         && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
6910       str = StringSave (vnp->data.ptrvalue);
6911     }
6912     vnp = vnp->next;
6913   }
6914   return str;
6915 }
6916 
6917 
RemoveValNodeStringMatch(ValNodePtr PNTR list,StringConstraintPtr scp)6918 NLM_EXTERN Boolean RemoveValNodeStringMatch (ValNodePtr PNTR list, StringConstraintPtr scp)
6919 {
6920   ValNodePtr vnp_prev = NULL, vnp_next, vnp;
6921   Boolean    rval = FALSE;
6922 
6923   if (list == NULL) return FALSE;
6924   vnp = *list;
6925   while (vnp != NULL) {
6926     vnp_next = vnp->next;
6927     if (!StringHasNoText (vnp->data.ptrvalue)
6928         && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
6929       if (vnp_prev == NULL) {
6930         *list = vnp->next;
6931       } else {
6932         vnp_prev->next = vnp->next;
6933       }
6934       vnp->next = NULL;
6935       vnp = ValNodeFreeData (vnp);
6936       rval = TRUE;
6937     } else {
6938       vnp_prev = vnp;
6939     }
6940     vnp = vnp_next;
6941   }
6942   return rval;
6943 }
6944 
6945 
GetFirstGBQualMatch(GBQualPtr qual,CharPtr qual_name,Int4 subfield,StringConstraintPtr scp)6946 NLM_EXTERN CharPtr GetFirstGBQualMatch (GBQualPtr qual, CharPtr qual_name, Int4 subfield, StringConstraintPtr scp)
6947 {
6948   CharPtr str = NULL;
6949   while (qual != NULL && str == NULL) {
6950     if (StringICmp (qual->qual, qual_name) == 0) {
6951       str = GetTwoFieldSubfield (qual->val, subfield);
6952       if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
6953         str = MemFree (str);
6954       }
6955     }
6956     qual = qual->next;
6957   }
6958   return str;
6959 }
6960 
6961 
GetFirstGBQualMatchConstraintName(GBQualPtr qual,StringConstraintPtr qual_name,StringConstraintPtr scp)6962 static CharPtr GetFirstGBQualMatchConstraintName (GBQualPtr qual, StringConstraintPtr qual_name, StringConstraintPtr scp)
6963 {
6964   CharPtr str = NULL;
6965   while (qual != NULL && str == NULL) {
6966     if (DoesStringMatchConstraint (qual->qual, qual_name)
6967         &&!StringHasNoText (qual->val)
6968         && DoesStringMatchConstraint (qual->val, scp)) {
6969       str = StringSave (qual->val);
6970     }
6971     qual = qual->next;
6972   }
6973   return str;
6974 }
6975 
6976 
RemoveGBQualMatch(GBQualPtr PNTR list,CharPtr qual_name,Int4 subfield,StringConstraintPtr scp)6977 NLM_EXTERN Boolean RemoveGBQualMatch (GBQualPtr PNTR list, CharPtr qual_name, Int4 subfield, StringConstraintPtr scp)
6978 {
6979   GBQualPtr qual_prev = NULL, qual_next, qual;
6980   CharPtr   tmp;
6981   Boolean   rval = FALSE, does_match, do_remove;
6982 
6983   if (list == NULL) return FALSE;
6984 
6985   qual = *list;
6986   while (qual != NULL) {
6987     qual_next = qual->next;
6988     do_remove = FALSE;
6989     if (StringICmp (qual->qual, qual_name) == 0) {
6990       if (subfield > 0) {
6991         does_match = TRUE;
6992         if (!IsStringConstraintEmpty (scp)) {
6993           tmp = GetTwoFieldSubfield (qual->val, subfield);
6994           does_match = DoesStringMatchConstraint (tmp, scp);
6995           tmp = MemFree (tmp);
6996         }
6997         if (RemoveTwoFieldSubfield (&(qual->val), subfield)) {
6998           rval = TRUE;
6999           if (StringHasNoText (qual->val)) {
7000             do_remove = TRUE;
7001           }
7002         }
7003       } else if (DoesStringMatchConstraint (qual->val, scp)) {
7004         do_remove = TRUE;
7005       }
7006     }
7007     if (do_remove) {
7008       if (qual_prev == NULL) {
7009         *list = qual->next;
7010       } else {
7011         qual_prev->next = qual->next;
7012       }
7013       qual->next = NULL;
7014       qual = GBQualFree (qual);
7015       rval = TRUE;
7016     } else {
7017       qual_prev = qual;
7018     }
7019     qual = qual_next;
7020   }
7021   return rval;
7022 }
7023 
7024 
RemoveGBQualMatchConstraintName(GBQualPtr PNTR list,StringConstraintPtr qual_name,StringConstraintPtr scp)7025 static Boolean RemoveGBQualMatchConstraintName (GBQualPtr PNTR list, StringConstraintPtr qual_name, StringConstraintPtr scp)
7026 {
7027   GBQualPtr qual_prev = NULL, qual_next, qual;
7028   Boolean   rval = FALSE;
7029 
7030   if (list == NULL) return FALSE;
7031   qual = *list;
7032   while (qual != NULL) {
7033     qual_next = qual->next;
7034     if (DoesStringMatchConstraint (qual->qual, qual_name)
7035         && !StringHasNoText (qual->val)
7036         && DoesStringMatchConstraint (qual->val, scp)) {
7037       if (qual_prev == NULL) {
7038         *list = qual->next;
7039       } else {
7040         qual_prev->next = qual->next;
7041       }
7042       qual->next = NULL;
7043       qual = GBQualFree (qual);
7044       rval = TRUE;
7045     } else {
7046       qual_prev = qual;
7047     }
7048     qual = qual_next;
7049   }
7050   return rval;
7051 }
7052 
7053 
GetDbxrefString(ValNodePtr list,StringConstraintPtr scp)7054 static CharPtr GetDbxrefString (ValNodePtr list, StringConstraintPtr scp)
7055 {
7056   ValNodePtr vnp;
7057   Int4       len = 0;
7058   CharPtr    str = NULL, cp;
7059 
7060   if (list == NULL) {
7061     return NULL;
7062   }
7063 
7064   for (vnp = list; vnp != NULL; vnp = vnp->next) {
7065     cp = GetDbtagString (vnp->data.ptrvalue);
7066     if (cp != NULL && DoesStringMatchConstraint(cp, scp)) {
7067       len += StringLen (cp) + 1;
7068     }
7069     cp = MemFree (cp);
7070   }
7071 
7072   if (len == 0) {
7073     return NULL;
7074   }
7075 
7076   str = (CharPtr) MemNew ((len + 1) * sizeof (Char));
7077   if (str != NULL) {
7078     for (vnp = list; vnp != NULL; vnp = vnp->next) {
7079       cp = GetDbtagString (vnp->data.ptrvalue);
7080       if (cp != NULL && DoesStringMatchConstraint(cp, scp)) {
7081         StringCat (str, cp);
7082         StringCat (str, ";");
7083       }
7084       cp = MemFree (cp);
7085     }
7086   }
7087   if (StringLen (str) >1) {
7088     /* remove final semicolon */
7089     str [StringLen (str) - 1] = 0;
7090   }
7091   return str;
7092 }
7093 
7094 
GetMultipleDbxrefStrings(ValNodePtr list,StringConstraintPtr scp)7095 static ValNodePtr GetMultipleDbxrefStrings (ValNodePtr list, StringConstraintPtr scp)
7096 {
7097   ValNodePtr vnp, val_list = NULL;
7098   CharPtr    cp;
7099 
7100   for (vnp = list; vnp != NULL; vnp = vnp->next) {
7101     cp = GetDbtagString (vnp->data.ptrvalue);
7102     if (cp != NULL && DoesStringMatchConstraint(cp, scp)) {
7103       ValNodeAddPointer (&val_list, 0, cp);
7104     }
7105   }
7106 
7107   return val_list;
7108 }
7109 
7110 
RemoveDbxrefString(ValNodePtr PNTR list,StringConstraintPtr scp)7111 static Boolean RemoveDbxrefString (ValNodePtr PNTR list, StringConstraintPtr scp)
7112 {
7113   ValNodePtr vnp, vnp_prev = NULL, vnp_next;
7114   CharPtr    cp;
7115   Boolean    rval = FALSE;
7116 
7117   if (list == NULL || *list == NULL) {
7118     return FALSE;
7119   }
7120 
7121   vnp = *list;
7122   while (vnp != NULL) {
7123     vnp_next = vnp->next;
7124     cp = GetDbtagString (vnp->data.ptrvalue);
7125     if (DoesStringMatchConstraint(cp, scp)) {
7126       if (vnp_prev == NULL) {
7127         *list = vnp->next;
7128       } else {
7129         vnp_prev->next = vnp->next;
7130       }
7131       vnp->next = NULL;
7132       vnp->data.ptrvalue = DbtagFree (vnp->data.ptrvalue);
7133       vnp = ValNodeFree (vnp);
7134       rval = TRUE;
7135     } else {
7136       vnp_prev = vnp;
7137     }
7138     vnp = vnp_next;
7139   }
7140   return rval;
7141 }
7142 
7143 
7144 static CharPtr trnaList [] = {
7145   "tRNA-Gap",
7146   "tRNA-Ala",
7147   "tRNA-Asx",
7148   "tRNA-Cys",
7149   "tRNA-Asp",
7150   "tRNA-Glu",
7151   "tRNA-Phe",
7152   "tRNA-Gly",
7153   "tRNA-His",
7154   "tRNA-Ile",
7155   "tRNA-Xle",
7156   "tRNA-Lys",
7157   "tRNA-Leu",
7158   "tRNA-Met",
7159   "tRNA-Asn",
7160   "tRNA-Pyl",
7161   "tRNA-Pro",
7162   "tRNA-Gln",
7163   "tRNA-Arg",
7164   "tRNA-Ser",
7165   "tRNA-Thr",
7166   "tRNA-Sec",
7167   "tRNA-Val",
7168   "tRNA-Trp",
7169   "tRNA-OTHER",
7170   "tRNA-Tyr",
7171   "tRNA-Glx",
7172   "tRNA-TERM",
7173   NULL
7174 };
7175 
7176 
GetTrnaProductString(tRNAPtr trna)7177 static CharPtr GetTrnaProductString (tRNAPtr trna)
7178 {
7179   Uint1              aa;
7180   Uint1              from;
7181   SeqMapTablePtr     smtp;
7182   Uint2              idx;
7183   CharPtr            str = NULL;
7184 
7185   if (trna == NULL) {
7186     return NULL;
7187   }
7188   aa = 0;
7189   if (trna->aatype == 2) {
7190     aa = trna->aa;
7191   } else {
7192     from = 0;
7193     switch (trna->aatype) {
7194       case 0 :
7195         from = 0;
7196         break;
7197       case 1 :
7198         from = Seq_code_iupacaa;
7199         break;
7200       case 2 :
7201         from = Seq_code_ncbieaa;
7202         break;
7203       case 3 :
7204         from = Seq_code_ncbi8aa;
7205         break;
7206       case 4 :
7207         from = Seq_code_ncbistdaa;
7208         break;
7209       default:
7210         break;
7211     }
7212     smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
7213     if (smtp != NULL) {
7214       aa = SeqMapTableConvert (smtp, trna->aa);
7215       if (aa == 255 && from == Seq_code_iupacaa) {
7216         if (trna->aa == 'U') {
7217           aa = 'U';
7218         } else if (trna->aa == 'O') {
7219           aa = 'O';
7220         }
7221       }
7222     }
7223   }
7224   if (aa > 0 && aa != 255) {
7225     if (aa != '*') {
7226       idx = aa - (64 /* + shift */);
7227     } else {
7228       idx = 25;
7229     }
7230     if (idx > 0 && idx < 28) {
7231       str = trnaList [idx];
7232     }
7233   }
7234   return str;
7235 }
7236 
7237 
GetRNARefProductString(RnaRefPtr rrp,StringConstraintPtr scp)7238 NLM_EXTERN CharPtr GetRNARefProductString (RnaRefPtr rrp, StringConstraintPtr scp)
7239 {
7240   CharPtr    str = NULL;
7241   RNAGenPtr  rgp;
7242 
7243   if (rrp == NULL || rrp->ext.choice == 0) {
7244     return NULL;
7245   }
7246 
7247   if (rrp->ext.choice == 1) {
7248     str = StringSave (rrp->ext.value.ptrvalue);
7249   } else if (rrp->ext.choice == 2) {
7250     str = StringSaveNoNull (GetTrnaProductString (rrp->ext.value.ptrvalue));
7251   } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL) {
7252     if (!StringHasNoText (rgp->product)) {
7253       str = StringSave (rgp->product);
7254     }
7255   }
7256   if (!DoesStringMatchConstraint(str, scp)) {
7257     str = MemFree (str);
7258   }
7259 
7260   return str;
7261 
7262 }
7263 
7264 
GetRNAProductString(SeqFeatPtr sfp,StringConstraintPtr scp)7265 NLM_EXTERN CharPtr GetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp)
7266 {
7267   RnaRefPtr  rrp;
7268   RNAGenPtr  rgp;
7269   SeqMgrFeatContext context;
7270   CharPtr    str = NULL;
7271 
7272   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
7273     return NULL;
7274   }
7275 
7276   rrp = sfp->data.value.ptrvalue;
7277   if (rrp->ext.choice == 0
7278       || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))
7279       || (rrp->ext.choice == 1
7280           && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0
7281               || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0
7282               || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) {
7283     str = GetFirstGBQualMatch (sfp->qual, "product", 0, scp);
7284   }
7285 
7286 
7287   if (str == NULL) {
7288     if (rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue)
7289         && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
7290         && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
7291         && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0) {
7292       str = StringSave (rrp->ext.value.ptrvalue);
7293     } else if (rrp->ext.choice == 2 && rrp->ext.value.ptrvalue != NULL) {
7294       if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context) != NULL
7295           && !StringHasNoText (context.label)
7296           && StringCmp (context.label, "tRNA") != 0) {
7297         str = (CharPtr) MemNew (sizeof (Char) + (StringLen (context.label) + 6));
7298         sprintf (str, "tRNA-%s", context.label);
7299       }
7300     } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL) {
7301       if (!StringHasNoText (rgp->product)) {
7302         str = StringSave (rgp->product);
7303       }
7304     }
7305     if (!DoesStringMatchConstraint(str, scp)) {
7306       str = MemFree (str);
7307     }
7308   }
7309   return str;
7310 }
7311 
7312 
IsParseabletRNAName(CharPtr name_string)7313 static Boolean IsParseabletRNAName (CharPtr name_string)
7314 {
7315   if (StringHasNoText(name_string))
7316   {
7317     return TRUE;
7318   }
7319   else if (StringNICmp (name_string, "trna-", 5) != 0)
7320   {
7321     return FALSE;
7322   }
7323   else if (StringLen (name_string) != 8)
7324   {
7325     return FALSE;
7326   }
7327   else if (ParseTRnaString (name_string, NULL, NULL, TRUE) == 0)
7328   {
7329     return FALSE;
7330   }
7331   else
7332   {
7333     return TRUE;
7334   }
7335 }
7336 
7337 
SetRNARefProductString(RnaRefPtr rrp,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)7338 NLM_EXTERN Boolean SetRNARefProductString (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
7339 {
7340   Boolean rval = FALSE;
7341   RNAGenPtr rgp;
7342   CharPtr   cp, tmp;
7343   tRNAPtr   trp;
7344   Boolean justTrnaText = FALSE;
7345   Uint1   codon [6];
7346 
7347   if (rrp == NULL) {
7348     return FALSE;
7349   }
7350   if (rrp->ext.choice == 0) {
7351     if (scp == NULL || scp->match_text == NULL) {
7352       if (rrp->type == 5 || rrp->type == 6 || rrp->type == 7 || rrp->type == 8 || rrp->type == 9 || rrp->type == 10) {
7353         rgp = RNAGenNew ();
7354         rgp->product = StringSave (new_val);
7355         rrp->ext.choice = 3;
7356         rrp->ext.value.ptrvalue = rgp;
7357       } else {
7358         rrp->ext.choice = 1;
7359         rrp->ext.value.ptrvalue = StringSave (new_val);
7360       }
7361       rval = TRUE;
7362     }
7363   } else if (rrp->ext.choice == 1) {
7364     cp = rrp->ext.value.ptrvalue;
7365     rval = SetStringValue (&cp, new_val, existing_text);
7366     rrp->ext.value.ptrvalue = cp;
7367   } else if (rrp->ext.choice == 3) {
7368     rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
7369     rval = SetStringValue (&(rgp->product), new_val, existing_text);
7370   } else if (rrp->ext.choice == 2) {
7371     tmp = StringSaveNoNull (GetTrnaProductString (rrp->ext.value.ptrvalue));
7372 
7373     if (DoesStringMatchConstraint (tmp, scp)
7374         && SetStringValue (&tmp, new_val, existing_text)) {
7375       trp = (tRNAPtr) rrp->ext.value.ptrvalue;
7376       if (trp == NULL) {
7377         trp = MemNew (sizeof (tRNA));
7378         trp->aatype = 0;
7379         MemSet (trp->codon, 255, sizeof (trp->codon));
7380         trp->anticodon = NULL;
7381         rrp->ext.value.ptrvalue = trp;
7382       }
7383 
7384       if (!IsParseabletRNAName(tmp))
7385       {
7386         if (trp->anticodon == NULL
7387             && trp->codon[0] == 255
7388             && trp->codon[1] == 255
7389             && trp->codon[2] == 255
7390             && trp->codon[3] == 255
7391             && trp->codon[4] == 255
7392             && trp->codon[5] == 255)
7393         {
7394           trp = MemFree (trp);
7395           rrp->ext.choice = 1;
7396           rrp->ext.value.ptrvalue = tmp;
7397           tmp = NULL;
7398           rval = TRUE;
7399         }
7400       }
7401       else
7402       {
7403         trp->aa = ParseTRnaString (tmp, &justTrnaText, codon, TRUE);
7404         trp->aatype = 2;
7405         rval = TRUE;
7406       }
7407       tmp = MemFree (tmp);
7408     }
7409   }
7410   return rval;
7411 }
7412 
7413 
SetRNAProductString(SeqFeatPtr sfp,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)7414 NLM_EXTERN Boolean SetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
7415 {
7416   RnaRefPtr  rrp;
7417   RNAGenPtr  rgp;
7418   Boolean rval = FALSE;
7419   ValNode vn;
7420   CharPtr cp, tmp;
7421   tRNAPtr trp;
7422   Boolean justTrnaText = FALSE;
7423   Uint1   codon [6];
7424 
7425   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
7426     return FALSE;
7427   }
7428 
7429   rrp = sfp->data.value.ptrvalue;
7430 
7431   if ((rrp->ext.choice == 0 && rrp->type != 5 && rrp->type != 6 && rrp->type != 7 && rrp->type != 8 && rrp->type != 9 && rrp->type != 10)
7432       || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))
7433       || (rrp->ext.choice == 1
7434           && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0
7435               || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0
7436               || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) {
7437     vn.choice = FeatQualChoice_legal_qual;
7438     vn.data.intvalue = Feat_qual_legal_product;
7439 
7440     rval = SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text);
7441   }
7442 
7443   if (!rval) {
7444     if (rrp->ext.choice == 0
7445         && (rrp->type == 5 || rrp->type == 6 || rrp->type == 7 || rrp->type == 8
7446             || rrp->type == 9 || rrp->type == 10)) {
7447       rrp->ext.choice = 3;
7448     }
7449     if ((rrp->ext.choice == 0 || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)))
7450         && (scp == NULL || scp->match_text == NULL)) {
7451       rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
7452       rrp->ext.value.ptrvalue = StringSave (new_val);
7453       rrp->ext.choice = 1;
7454       rval = TRUE;
7455     } else if (rrp->ext.choice == 1
7456                 && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
7457                 && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
7458                 && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0
7459                 && DoesStringMatchConstraint (rrp->ext.value.ptrvalue, scp)) {
7460       cp = rrp->ext.value.ptrvalue;
7461       rval = SetStringValue (&cp, new_val, existing_text);
7462       rrp->ext.value.ptrvalue = cp;
7463       rval = TRUE;
7464     } else if (rrp->ext.choice == 3) {
7465       rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
7466       if (rgp == NULL) {
7467         rgp = RNAGenNew ();
7468         rrp->ext.value.ptrvalue = rgp;
7469       }
7470       rval = SetStringValue (&(rgp->product), new_val, existing_text);
7471     } else if (rrp->ext.choice == 2) {
7472       tmp = GetRNAProductString (sfp, NULL);
7473       if (DoesStringMatchConstraint (tmp, scp)
7474           && SetStringValue (&tmp, new_val, existing_text)) {
7475         trp = (tRNAPtr) rrp->ext.value.ptrvalue;
7476         if (trp == NULL) {
7477           trp = MemNew (sizeof (tRNA));
7478           trp->aatype = 0;
7479           MemSet (trp->codon, 255, sizeof (trp->codon));
7480           trp->anticodon = NULL;
7481           rrp->ext.value.ptrvalue = trp;
7482         }
7483 
7484         if (!IsParseabletRNAName(tmp))
7485         {
7486           if (trp->anticodon == NULL
7487               && trp->codon[0] == 255
7488               && trp->codon[1] == 255
7489               && trp->codon[2] == 255
7490               && trp->codon[3] == 255
7491               && trp->codon[4] == 255
7492               && trp->codon[5] == 255)
7493           {
7494             trp = MemFree (trp);
7495             rrp->ext.choice = 1;
7496             rrp->ext.value.ptrvalue = tmp;
7497             tmp = NULL;
7498             rval = TRUE;
7499           }
7500           else
7501           {
7502             vn.choice = FeatQualChoice_legal_qual;
7503             vn.data.intvalue = Feat_qual_legal_product;
7504             if (SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text)) {
7505               trp->aa = 0;
7506               rval = TRUE;
7507             }
7508           }
7509         }
7510         else
7511         {
7512           trp->aa = ParseTRnaString (tmp, &justTrnaText, codon, TRUE);
7513           trp->aatype = 2;
7514           rval = TRUE;
7515         }
7516         tmp = MemFree (tmp);
7517       }
7518     }
7519   }
7520   return rval;
7521 }
7522 
7523 
RemoveRNAProductString(SeqFeatPtr sfp,StringConstraintPtr scp)7524 NLM_EXTERN Boolean RemoveRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp)
7525 {
7526   RnaRefPtr  rrp;
7527   RNAGenPtr  rgp;
7528   Boolean    rval = FALSE;
7529 
7530   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
7531     return FALSE;
7532   }
7533 
7534   rrp = sfp->data.value.ptrvalue;
7535   if (rrp->ext.choice == 0
7536       || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))
7537       || (rrp->ext.choice == 1
7538           && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0
7539               || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0
7540               || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) {
7541     rval = RemoveGBQualMatch (&(sfp->qual), "product", 0, scp);
7542   }
7543 
7544   if (!rval) {
7545     if (rrp->ext.choice == 1) {
7546       if (!StringHasNoText (rrp->ext.value.ptrvalue)
7547         && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
7548         && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
7549         && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0
7550         && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) {
7551         rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
7552         rrp->ext.choice = 0;
7553         rval = TRUE;
7554       }
7555     } else if (rrp->ext.choice == 3) {
7556       rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
7557       if (rgp != NULL && !StringHasNoText (rgp->product)
7558           && DoesStringMatchConstraint (rgp->product, scp)) {
7559         rgp->product = MemFree (rgp->product);
7560         rval = TRUE;
7561       }
7562     }
7563   }
7564   return rval;
7565 }
7566 
7567 
7568 
7569 
RemovetRNACodons_Recognized(SeqFeatPtr sfp)7570 static Boolean RemovetRNACodons_Recognized (SeqFeatPtr sfp)
7571 {
7572   RnaRefPtr rrp;
7573   tRNAPtr   trp;
7574 
7575   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
7576     return FALSE;
7577   }
7578 
7579   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
7580   if (rrp->ext.choice != 2) {
7581     return FALSE;
7582   }
7583   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
7584   if (trp == NULL) {
7585     return FALSE;
7586   }
7587 
7588   trp->codon [0] = 255;
7589   trp->codon [1] = 255;
7590   trp->codon [2] = 255;
7591   trp->codon [3] = 255;
7592   trp->codon [4] = 255;
7593   trp->codon [5] = 255;
7594 
7595   return TRUE;
7596 }
7597 
7598 
7599 /*
7600 M          A or C
7601 R          A or G
7602 W          A or T
7603 S          C or G
7604 Y          C or T
7605 K          G or T
7606 V        A or C or G
7607 H        A or C or T
7608 D        A or G or T
7609 B        C or G or T
7610 X      G or A or T or C
7611 N      G or A or T or C
7612 */
7613 typedef struct ambiguitychar {
7614   Char ch;
7615   CharPtr replacements;
7616 } AmbiguityCharData, PNTR AmbiguityCharPtr;
7617 
7618 
7619 static AmbiguityCharData s_AmbiguityChars[] = {
7620   {'M', "AC"},
7621   {'R', "AG"},
7622   {'W', "AT"},
7623   {'S', "CG"},
7624   {'Y', "CT"},
7625   {'K', "GT"},
7626   {'V', "ACG"},
7627   {'H', "ACT"},
7628   {'D', "AGT"},
7629   {'B', "CGT"},
7630   {'X', "GATC"},
7631   {'N', "GATC"},
7632   {'\0', NULL}
7633 };
7634 
7635 
ExpandWobbleCodon(CharPtr codon)7636 static ValNodePtr ExpandWobbleCodon (CharPtr codon)
7637 {
7638   ValNodePtr list = NULL, vnp, new_list;
7639   Int4 i, j, len;
7640   CharPtr this_codon, cp, new_codon;
7641   Boolean any;
7642 
7643   if (StringHasNoText (codon)) {
7644     return NULL;
7645   }
7646   len = StringLen (codon);
7647   ValNodeAddPointer (&list, 0, StringSave (codon));
7648 
7649   for (j = 0; j < len; j++) {
7650     new_list = NULL;
7651     for (vnp = list; vnp != NULL; vnp = vnp->next) {
7652       this_codon = vnp->data.ptrvalue;
7653       any = FALSE;
7654       for (i = 0; s_AmbiguityChars[i].ch != 0 && !any; i++) {
7655         if (this_codon[j] == s_AmbiguityChars[i].ch) {
7656           cp = s_AmbiguityChars[i].replacements;
7657           while (*cp != 0) {
7658             new_codon = StringSave (this_codon);
7659             new_codon[j] = *cp;
7660             ValNodeAddPointer (&new_list, 0, new_codon);
7661             cp++;
7662           }
7663           any = TRUE;
7664         }
7665       }
7666       if (!any) {
7667         ValNodeAddPointer (&new_list, 0, StringSave (this_codon));
7668       }
7669     }
7670     list = ValNodeFreeData (list);
7671     list = new_list;
7672   }
7673 
7674   for (vnp = list; vnp != NULL; vnp = vnp->next) {
7675     vnp->choice = IndexForCodon (vnp->data.ptrvalue, Seq_code_iupacna);
7676   }
7677   return list;
7678 }
7679 
7680 
ParseCodonsRecognizedFromCommaDelimitedList(CharPtr str,Uint1Ptr codons)7681 static Boolean ParseCodonsRecognizedFromCommaDelimitedList (CharPtr str, Uint1Ptr codons)
7682 {
7683   Int4 codon_num, k = 0, q;
7684   Char    ch;
7685   Boolean rval = TRUE;
7686   Uint1   codon[4];
7687   ValNodePtr wobble_list, vnp;
7688 
7689   if (StringHasNoText (str) || codons == NULL) {
7690     return FALSE;
7691   }
7692 
7693   for (codon_num = 0; codon_num < 6; codon_num++) {
7694     codons[codon_num] = 255;
7695   }
7696   codon_num = 0;
7697 
7698   while (isspace (*str)) {
7699     str++;
7700   }
7701 
7702   while (*str != 0 && codon_num < 6 && rval) {
7703     k = 0;
7704     q = 0;
7705     ch = str [k];
7706     while (ch != '\0' && q < 3 && rval) {
7707       ch = TO_UPPER (ch);
7708       if (StringChr ("ACGTUYNKMRYSWBVHD", ch) != NULL) {
7709         if (ch == 'U') {
7710           ch = 'T';
7711         }
7712         codon [q] = (Uint1) ch;
7713         q++;
7714       } else {
7715         rval = FALSE;
7716       }
7717       k++;
7718       ch = str [k];
7719     }
7720     if (q < 3 || isalpha (ch)) {
7721       rval = FALSE;
7722     }
7723     if (rval) {
7724       codon [q] = 0;
7725       if (q == 3) {
7726         wobble_list = ExpandWobbleCodon(codon);
7727         for (vnp = wobble_list; vnp != NULL && codon_num < 6 && rval; vnp = vnp->next) {
7728           if (vnp->choice == INVALID_RESIDUE) {
7729             rval = FALSE;
7730           } else {
7731             codons [codon_num++] = vnp->choice;
7732           }
7733         }
7734         if (vnp != NULL) {
7735           /* too many ambiguities */
7736           rval = FALSE;
7737         }
7738         wobble_list = ValNodeFreeData (wobble_list);
7739       }
7740       str += 3;
7741       while (isspace (*str)) {
7742         str++;
7743       }
7744       while (*str == ',') {
7745         str++;
7746       }
7747       while (isspace (*str)) {
7748         str++;
7749       }
7750     }
7751   }
7752   if (*str != 0) {
7753     rval = FALSE;
7754   }
7755   return rval;
7756 }
7757 
7758 
SettRNACodons_Recognized(SeqFeatPtr sfp,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)7759 NLM_EXTERN Boolean SettRNACodons_Recognized (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
7760 {
7761   RnaRefPtr rrp;
7762   tRNAPtr   trp;
7763   Uint1     codon[6];
7764   Uint1     new_codons[6];
7765   Int4      codon_num, num_new, num_old = 0, i;
7766   Boolean   rval = FALSE, already_have;
7767 
7768   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
7769     return FALSE;
7770   }
7771   if (StringHasNoText (new_val)) {
7772     return FALSE;
7773   }
7774 
7775   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
7776   if (rrp->ext.choice != 2) {
7777     return FALSE;
7778   }
7779   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
7780   if (trp == NULL) {
7781     return FALSE;
7782   }
7783 
7784   if (ParseCodonsRecognizedFromCommaDelimitedList (new_val, codon)) {
7785     switch (existing_text) {
7786       case ExistingTextOption_replace_old :
7787         for (codon_num = 0; codon_num < 6; codon_num++) {
7788           trp->codon[codon_num] = codon[codon_num];
7789         }
7790         rval = TRUE;
7791         break;
7792       case ExistingTextOption_append_semi :
7793       case ExistingTextOption_append_space :
7794       case ExistingTextOption_append_colon :
7795       case ExistingTextOption_append_comma :
7796       case ExistingTextOption_append_none :
7797       case ExistingTextOption_prefix_semi :
7798       case ExistingTextOption_prefix_space :
7799       case ExistingTextOption_prefix_colon :
7800       case ExistingTextOption_prefix_comma :
7801       case ExistingTextOption_prefix_none :
7802       case ExistingTextOption_add_qual :
7803         for (num_old = 0; num_old < 6 && trp->codon[num_old] != 255; num_old++) {
7804           new_codons[num_old] = trp->codon[num_old];
7805         }
7806         codon_num = num_old;
7807         rval = TRUE;
7808         for (num_new = 0; num_new < 6 && codon[num_new] != 255 && rval; num_new++) {
7809           already_have = FALSE;
7810           for (i = 0; i < codon_num && !already_have; i++) {
7811             if (codon[num_new] == new_codons[i]) {
7812               already_have = TRUE;
7813             }
7814           }
7815           if (!already_have) {
7816             if (codon_num < 6) {
7817               new_codons[codon_num] = codon[num_new];
7818               codon_num++;
7819             } else {
7820               rval = FALSE;
7821             }
7822           }
7823         }
7824         if (rval) {
7825           for (i = 0; i < codon_num; i++) {
7826             trp->codon[i] = new_codons[i];
7827           }
7828           while (codon_num < 6) {
7829             trp->codon[codon_num++] = 255;
7830           }
7831         }
7832         break;
7833       case ExistingTextOption_leave_old :
7834         if (trp->codon[0] == 255) {
7835           for (i = 0; i < 6; i++) {
7836             trp->codon[i] = codon[i];
7837           }
7838           rval = TRUE;
7839         }
7840         break;
7841     }
7842   }
7843   return TRUE;
7844 }
7845 
7846 
GettRNACodonsRecognized(SeqFeatPtr sfp,StringConstraintPtr scp)7847 static CharPtr GettRNACodonsRecognized (SeqFeatPtr sfp, StringConstraintPtr scp)
7848 {
7849   RnaRefPtr rrp;
7850   tRNAPtr   trp;
7851   Int4      j;
7852   Char      buf[31];
7853   Uint1     codon [4];
7854 
7855   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
7856     return NULL;
7857   }
7858 
7859   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
7860   if (rrp->ext.choice != 2) {
7861     return NULL;
7862   }
7863   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
7864   if (trp == NULL) {
7865     return NULL;
7866   }
7867 
7868   buf[0] = 0;
7869 
7870   for (j = 0; j < 6; j++) {
7871     if (trp->codon [j] < 64) {
7872             /* Note - it is important to set the fourth character in the codon array to NULL
7873                 * because CodonForIndex only fills in the three characters of actual codon,
7874                 * so if you StringCpy the codon array and the NULL character is not found after
7875                 * the three codon characters, you will write in memory you did not intend to.
7876                 */
7877             codon [3] = 0;
7878       if (CodonForIndex (trp->codon [j], Seq_code_iupacna, codon)) {
7879         if (buf[0] != 0) {
7880           StringCat (buf, ", ");
7881         }
7882         StringCat (buf, (CharPtr) codon);
7883       }
7884     }
7885   }
7886   if (buf[0] == 0) {
7887     return NULL;
7888   } else {
7889     return StringSave (buf);
7890   }
7891 }
7892 
7893 
SettmRNATagPeptide(RnaRefPtr rrp,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)7894 NLM_EXTERN Boolean SettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
7895 {
7896   RNAGenPtr rgp;
7897   Boolean   rval = FALSE;
7898 
7899   if (rrp == NULL) {
7900     return FALSE;
7901   }
7902   if (rrp->ext.choice == 0) {
7903     rrp->ext.choice = 3;
7904   }
7905 
7906   if (rrp->ext.choice == 1) {
7907     rgp = RNAGenNew ();
7908     rgp->product = rrp->ext.value.ptrvalue;
7909     rrp->ext.value.ptrvalue = rgp;
7910     rrp->ext.choice = 3;
7911   }
7912   if (rrp->ext.choice == 3) {
7913     rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
7914     if (rgp == NULL) {
7915       rgp = RNAGenNew ();
7916       rrp->ext.value.ptrvalue = rgp;
7917     }
7918     rval = SetStringInRNAQualList (&(rgp->quals), "tag_peptide", scp, new_val, existing_text);
7919   }
7920   return rval;
7921 }
7922 
7923 
GettmRNATagPeptide(RnaRefPtr rrp,StringConstraintPtr scp)7924 NLM_EXTERN CharPtr GettmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp)
7925 {
7926   RNAGenPtr rgp;
7927 
7928   if (rrp == NULL
7929       || rrp->ext.choice != 3
7930       || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) {
7931     return NULL;
7932   }
7933   return GetFirstRNAQualMatchName (rgp->quals, "tag_peptide", scp);
7934 }
7935 
7936 
RemovetmRNATagPeptide(RnaRefPtr rrp,StringConstraintPtr scp)7937 static Boolean RemovetmRNATagPeptide (RnaRefPtr rrp, StringConstraintPtr scp)
7938 {
7939   RNAGenPtr rgp;
7940 
7941   if (rrp == NULL
7942       || rrp->ext.choice != 3
7943       || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) {
7944     return FALSE;
7945   }
7946   return RemoveRNAQualMatch (&(rgp->quals), "tag_peptide", scp);
7947 }
7948 
7949 
SetncRNAClass(RnaRefPtr rrp,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)7950 NLM_EXTERN Boolean SetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
7951 {
7952   RNAGenPtr rgp;
7953   Boolean   rval = FALSE;
7954 
7955   if (rrp == NULL) {
7956     return FALSE;
7957   }
7958   if (rrp->ext.choice == 0) {
7959     rrp->ext.choice = 3;
7960   }
7961 
7962   if (rrp->ext.choice == 1) {
7963     rgp = RNAGenNew ();
7964     rgp->product = rrp->ext.value.ptrvalue;
7965     rrp->ext.value.ptrvalue = rgp;
7966     rrp->ext.choice = 3;
7967   }
7968   if (rrp->ext.choice == 3) {
7969     rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
7970     if (rgp == NULL) {
7971       rgp = RNAGenNew ();
7972       rrp->ext.value.ptrvalue = rgp;
7973     }
7974     rval = SetStringValue (&(rgp->_class), new_val, existing_text);
7975   }
7976   return rval;
7977 }
7978 
7979 
GetncRNAClass(RnaRefPtr rrp,StringConstraintPtr scp)7980 NLM_EXTERN CharPtr GetncRNAClass (RnaRefPtr rrp, StringConstraintPtr scp)
7981 {
7982   RNAGenPtr rgp;
7983 
7984   if (rrp == NULL
7985       || rrp->ext.choice != 3
7986       || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) {
7987     return NULL;
7988   }
7989   if (DoesStringMatchConstraint (rgp->_class, scp)) {
7990     return StringSave (rgp->_class);
7991   } else {
7992     return NULL;
7993   }
7994 }
7995 
7996 
RemovencRNAClass(RnaRefPtr rrp,StringConstraintPtr scp)7997 static Boolean RemovencRNAClass (RnaRefPtr rrp, StringConstraintPtr scp)
7998 {
7999   RNAGenPtr rgp;
8000 
8001   if (rrp == NULL
8002       || rrp->ext.choice != 3
8003       || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL) {
8004     return FALSE;
8005   }
8006   if (!StringHasNoText (rgp->_class) && DoesStringMatchConstraint (rgp->_class, scp)) {
8007     rgp->_class = MemFree (rgp->_class);
8008     return TRUE;
8009   } else {
8010     return FALSE;
8011   }
8012 }
8013 
8014 
ParseSimpleInterval(CharPtr str,BioseqPtr bsp,CharPtr PNTR end)8015 static SeqLocPtr ParseSimpleInterval (CharPtr str, BioseqPtr bsp, CharPtr PNTR end)
8016 {
8017   Boolean partial_left = FALSE, partial_right = FALSE;
8018   Int4    left_num, right_num, swap_num;
8019   SeqLocPtr slp = NULL;
8020   Uint1     strand = Seq_strand_plus;
8021 
8022   if (StringHasNoText (str)) {
8023     return NULL;
8024   }
8025 
8026   while (isspace (*str)) {
8027     str++;
8028   }
8029   if (*str == '<' || *str == '>') {
8030     partial_left = TRUE;
8031     str++;
8032   }
8033   if (!isdigit (*str)) {
8034     return NULL;
8035   }
8036   left_num = atoi (str);
8037   while (isdigit (*str)) {
8038     str++;
8039   }
8040   while (isspace (*str) || *str == '.' || *str == '-') {
8041     str++;
8042   }
8043   if (*str == '<' || *str == '>') {
8044     partial_right = TRUE;
8045     str++;
8046   }
8047   if (!isdigit (*str)) {
8048     return NULL;
8049   }
8050 
8051   right_num = atoi (str);
8052   while (isdigit (*str)) {
8053     str++;
8054   }
8055 
8056   if (left_num > right_num) {
8057     swap_num = left_num;
8058     left_num = right_num;
8059     right_num = swap_num;
8060     strand = Seq_strand_minus;
8061   }
8062 
8063   slp = SeqLocIntNew (left_num - 1, right_num - 1, strand, SeqIdDup (SeqIdFindWorst (bsp->id)));
8064   SetSeqLocPartial (slp, partial_left, partial_right);
8065 
8066   if (end != NULL) {
8067     *end = str;
8068   }
8069   return slp;
8070 }
8071 
8072 
ComplementSeqLoc(SeqLocPtr slp)8073 static void ComplementSeqLoc (SeqLocPtr slp)
8074 {
8075   SeqIntPtr sip;
8076   Boolean   partial5 = FALSE, partial3 = FALSE;
8077 
8078   if (slp != NULL && slp->choice == SEQLOC_INT && slp->data.ptrvalue != NULL) {
8079     sip = (SeqIntPtr) slp->data.ptrvalue;
8080     if (sip->strand != Seq_strand_minus) {
8081       CheckSeqLocForPartial (slp, &partial5, &partial3);
8082       SetSeqLocPartial (slp, partial3, partial5);
8083       sip->strand = Seq_strand_minus;
8084     }
8085   }
8086 }
8087 
8088 
ParseSimpleSeqLoc(CharPtr str,BioseqPtr bsp)8089 NLM_EXTERN SeqLocPtr ParseSimpleSeqLoc (CharPtr str, BioseqPtr bsp)
8090 {
8091   CharPtr cp, cp_next;
8092   SeqLocPtr slp = NULL, slp_first = NULL, slp_tmp;
8093   Boolean is_complement = FALSE;
8094 
8095   if (StringHasNoText (str) || bsp == NULL) {
8096     return NULL;
8097   }
8098 
8099   cp = str;
8100   while (isspace (*cp)) {
8101     cp ++;
8102   }
8103   while (*cp != 0) {
8104     is_complement = FALSE;
8105     if (StringNICmp (cp, "complement", 10) == 0) {
8106       cp += 10;
8107       is_complement = TRUE;
8108     } else if (StringNICmp (cp, "comp", 4) == 0) {
8109       cp += 4;
8110       is_complement = TRUE;
8111     }
8112     if (*cp == '(') {
8113       cp++;
8114     }
8115     slp_tmp = ParseSimpleInterval (cp, bsp, &cp_next);
8116     if (slp_tmp == NULL) {
8117       slp = SeqLocFree (slp);
8118       return NULL;
8119     }
8120     if (is_complement) {
8121       ComplementSeqLoc (slp_tmp);
8122     }
8123     if (slp == NULL) {
8124       slp = slp_tmp;
8125     } else if (slp->choice == SEQLOC_INT) {
8126       slp_first = slp;
8127       slp_first->next = slp_tmp;
8128       slp = ValNodeNew (NULL);
8129       slp->choice = SEQLOC_MIX;
8130       slp->data.ptrvalue = slp_first;
8131     } else {
8132       ValNodeLink ((ValNodePtr PNTR) slp->data.ptrvalue, slp_tmp);
8133     }
8134 
8135     cp = cp_next;
8136     while (isspace (*cp)) {
8137       cp++;
8138     }
8139     if (*cp == ')') {
8140       cp++;
8141     }
8142     while (isspace (*cp)) {
8143       cp++;
8144     }
8145     if (*cp == ',') {
8146       cp++;
8147     }
8148     while (isspace (*cp)) {
8149       cp++;
8150     }
8151   }
8152   if (*cp != 0) {
8153     slp = SeqLocFree (slp);
8154   }
8155   return slp;
8156 }
8157 
8158 
SetAnticodon(SeqFeatPtr sfp,StringConstraintPtr scp,CharPtr new_val,Uint2 existing_text)8159 static Boolean SetAnticodon (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
8160 {
8161   RnaRefPtr rrp;
8162   tRNAPtr   trp;
8163   Boolean   rval = FALSE;
8164   SeqLocPtr slp, slp_merge;
8165   BioseqPtr bsp;
8166 
8167   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
8168     return FALSE;
8169   }
8170   if (StringHasNoText (new_val)) {
8171     return FALSE;
8172   }
8173 
8174   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
8175   if (rrp->ext.choice != 2) {
8176     return FALSE;
8177   }
8178   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
8179   if (trp == NULL) {
8180     return FALSE;
8181   }
8182 
8183   if (trp->anticodon != NULL && existing_text == ExistingTextOption_leave_old) {
8184     return FALSE;
8185   }
8186 
8187   bsp = BioseqFindFromSeqLoc (sfp->location);
8188   if (bsp == NULL) {
8189     return FALSE;
8190   }
8191 
8192   slp = ParseSimpleSeqLoc (new_val, bsp);
8193   if (slp == NULL) {
8194     return FALSE;
8195   }
8196 
8197   if (trp->anticodon == NULL) {
8198     trp->anticodon = slp;
8199     rval = TRUE;
8200   } else if (existing_text == ExistingTextOption_replace_old) {
8201     trp->anticodon = SeqLocFree (trp->anticodon);
8202     trp->anticodon = slp;
8203     rval = TRUE;
8204   } else {
8205     slp_merge = SeqLocMerge (bsp, trp->anticodon, slp, FALSE, FALSE, FALSE);
8206     slp = SeqLocFree (slp);
8207     trp->anticodon = SeqLocFree (trp->anticodon);
8208     trp->anticodon = slp_merge;
8209     rval = TRUE;
8210   }
8211   return rval;
8212 }
8213 
8214 
GetIntervalString(SeqLocPtr slp)8215 static CharPtr GetIntervalString (SeqLocPtr slp)
8216 {
8217   CharPtr fmt = "%s%d..%s%d";
8218   CharPtr complement_fmt = "complement(%s%d..%s%d)";
8219   CharPtr str = NULL;
8220   SeqIntPtr sip;
8221   Boolean   partial5 = FALSE, partial3 = FALSE;
8222 
8223   if (slp == NULL || slp->choice != SEQLOC_INT || slp->data.ptrvalue == NULL) {
8224     return NULL;
8225   }
8226 
8227   sip = (SeqIntPtr) slp->data.ptrvalue;
8228 
8229   CheckSeqLocForPartial (slp, &partial5, &partial3);
8230 
8231   if (sip->strand == Seq_strand_minus) {
8232     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (complement_fmt) + 30));
8233     sprintf (str, complement_fmt, partial3 ? "<" : "",
8234                                   sip->from + 1,
8235                                   partial5 ? ">" : "",
8236                                   sip->to + 1);
8237   } else {
8238     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 30));
8239     sprintf (str, fmt, partial5 ? "<" : "",
8240                         sip->from + 1,
8241                         partial3 ? ">" : "",
8242                         sip->to + 1);
8243   }
8244   return str;
8245 }
8246 
8247 
GetAnticodonLocString(SeqFeatPtr sfp)8248 static CharPtr GetAnticodonLocString (SeqFeatPtr sfp)
8249 {
8250   RnaRefPtr rrp;
8251   tRNAPtr   trp;
8252   SeqLocPtr slp;
8253   CharPtr   str = NULL, tmp;
8254   ValNodePtr str_list = NULL, vnp;
8255   Int4       len = 0;
8256 
8257   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
8258     return NULL;
8259   }
8260 
8261   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
8262   if (rrp->ext.choice != 2) {
8263     return NULL;
8264   }
8265   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
8266   if (trp == NULL || trp->anticodon == NULL) {
8267     return NULL;
8268   }
8269 
8270   if (trp->anticodon->choice == SEQLOC_INT) {
8271     str = GetIntervalString (trp->anticodon);
8272   } else if (trp->anticodon->choice == SEQLOC_MIX) {
8273     for (slp = trp->anticodon->data.ptrvalue; slp != NULL; slp = slp->next) {
8274       tmp = GetIntervalString (slp);
8275       if (tmp == NULL) {
8276         str_list = ValNodeFreeData (str_list);
8277         return StringSave ("complex location");
8278       } else {
8279         len += StringLen (tmp) + 2;
8280         ValNodeAddPointer (&str_list, 0, tmp);
8281       }
8282     }
8283     str = (CharPtr) MemNew (sizeof (Char) * len);
8284     str[0] = 0;
8285     for (vnp = str_list; vnp != NULL; vnp = vnp->next) {
8286       StringCat (str, vnp->data.ptrvalue);
8287       if (vnp->next != NULL) {
8288         StringCat (str, ", ");
8289       }
8290     }
8291     str_list = ValNodeFreeData (str_list);
8292   }
8293   return str;
8294 }
8295 
8296 
8297 
GetProtRefForFeature(SeqFeatPtr sfp)8298 NLM_EXTERN ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp)
8299 {
8300   BioseqPtr  protbsp;
8301   SeqFeatPtr protsfp;
8302   ProtRefPtr prp = NULL;
8303   SeqFeatXrefPtr xref;
8304 
8305   if (sfp == NULL) return NULL;
8306 
8307   if (sfp->data.choice == SEQFEAT_PROT) {
8308     prp = (ProtRefPtr) sfp->data.value.ptrvalue;
8309   } else if (sfp->data.choice == SEQFEAT_CDREGION) {
8310     xref = sfp->xref;
8311     while (xref != NULL && xref->data.choice != SEQFEAT_PROT) {
8312       xref = xref->next;
8313     }
8314     if (xref != NULL) {
8315       prp = xref->data.value.ptrvalue;
8316     }
8317     if (prp == NULL && sfp->product != NULL) {
8318       protbsp = BioseqFindFromSeqLoc (sfp->product);
8319       protsfp = GetProtFeature (protbsp);
8320       if (protsfp != NULL) {
8321         prp = protsfp->data.value.ptrvalue;
8322       }
8323     }
8324   }
8325   return prp;
8326 }
8327 
8328 
GetGeneInfoForFeature(SeqFeatPtr sfp,GeneRefPtr PNTR p_grp,SeqFeatPtr PNTR p_gene)8329 NLM_EXTERN void GetGeneInfoForFeature (SeqFeatPtr sfp, GeneRefPtr PNTR p_grp, SeqFeatPtr PNTR p_gene)
8330 {
8331   GeneRefPtr grp = NULL;
8332   SeqFeatPtr gene = NULL;
8333   SeqMgrFeatContext fcontext;
8334 
8335   if (p_grp != NULL) {
8336     *p_grp = NULL;
8337   }
8338   if (p_gene != NULL) {
8339     *p_gene = NULL;
8340   }
8341 
8342   if (sfp == NULL) {
8343     return;
8344   }
8345   if (sfp->idx.subtype == FEATDEF_GENE) {
8346     grp = sfp->data.value.ptrvalue;
8347     gene = sfp;
8348   } else {
8349     grp = SeqMgrGetGeneXref (sfp);
8350     if (grp == NULL) {
8351       gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
8352       if (gene != NULL) {
8353         grp = gene->data.value.ptrvalue;
8354       }
8355     } else if (SeqMgrGeneIsSuppressed (grp)) {
8356       grp = NULL;
8357     }
8358   }
8359   if (p_grp != NULL) {
8360     *p_grp = grp;
8361   }
8362   if (p_gene != NULL) {
8363     *p_gene = gene;
8364   }
8365 }
8366 
8367 
GetCitationTextFromFeature(SeqFeatPtr sfp,StringConstraintPtr scp,ValNodePtr cit_list)8368 static CharPtr GetCitationTextFromFeature (SeqFeatPtr sfp, StringConstraintPtr scp, ValNodePtr cit_list)
8369 {
8370   SeqEntryPtr sep;
8371   BioseqPtr   bsp;
8372   ValNodePtr  list = NULL, vnp;
8373   CharPtr     rval = NULL;
8374   Int4        serial_number;
8375   Char        buf[100];
8376   ValNodePtr  psp;
8377 
8378   if (sfp == NULL || sfp->cit == NULL) {
8379     return NULL;
8380   }
8381 
8382   bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp);
8383 
8384   if (cit_list == NULL) {
8385     /* list not provided - must create now */
8386     sep = SeqMgrGetSeqEntryForData (bsp);
8387     list = GetCitListsForSeqEntry (sep);
8388     cit_list = list;
8389   }
8390 
8391   psp = sfp->cit->data.ptrvalue;
8392   for (vnp = psp; vnp != NULL && rval == NULL; vnp = vnp->next) {
8393 
8394     serial_number = GetCitationNumberForMinPub (bsp, vnp, cit_list);
8395     if (serial_number > -1) {
8396       sprintf (buf, "%d", serial_number);
8397       if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
8398         rval = StringSave (buf);
8399       }
8400     }
8401   }
8402 
8403   list = PubSerialNumberListFree (list);
8404 
8405   return rval;
8406 }
8407 
8408 
GetCodeBreakString(SeqFeatPtr sfp)8409 static CharPtr GetCodeBreakString (SeqFeatPtr sfp)
8410 {
8411   CdRegionPtr crp;
8412   ValNodePtr  list = NULL, vnp;
8413   BioseqPtr   bsp;
8414   Int4        len = 0;
8415   CharPtr     str = NULL;
8416 
8417   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION
8418       || (crp = (CdRegionPtr) sfp->data.value.ptrvalue) == NULL
8419       || crp->code_break == NULL) {
8420     return NULL;
8421   }
8422 
8423   bsp = BioseqFindFromSeqLoc (sfp->location);
8424 
8425   PrintFTCodeBreak (&list, crp->code_break, bsp);
8426 
8427   for (vnp = list; vnp != NULL; vnp = vnp->next) {
8428     if (StringNCmp (vnp->data.ptrvalue, "\t\t\ttransl_except\t", 17) == 0) {
8429       len += StringLen (vnp->data.ptrvalue) - 17;
8430     }
8431   }
8432   if (len > 0) {
8433     str = (CharPtr) MemNew (sizeof (Char) * (len + 1));
8434     str[0] = 0;
8435     for (vnp = list; vnp != NULL; vnp = vnp->next) {
8436       if (StringNCmp (vnp->data.ptrvalue, "\t\t\ttransl_except\t", 17) == 0) {
8437         StringCat (str, ((CharPtr) vnp->data.ptrvalue) + 17);
8438         if (vnp->next == NULL) {
8439           str[StringLen(str) - 1] = 0;
8440         } else {
8441           str[StringLen(str) - 1] = ';';
8442         }
8443       }
8444     }
8445   }
8446   list = ValNodeFreeData (list);
8447   return str;
8448 }
8449 
8450 
GetQualFromFeatureAnyType(SeqFeatPtr sfp,ValNodePtr field,StringConstraintPtr scp,BatchExtraPtr batch_extra)8451 static CharPtr GetQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
8452 {
8453   CharPtr   str = NULL;
8454   GeneRefPtr grp = NULL;
8455   ProtRefPtr prp = NULL;
8456   Int4      gbqual, subfield;
8457   SeqFeatPtr gene = NULL;
8458   CdRegionPtr crp;
8459   ValNodePtr  vnp;
8460   Char        buf[20];
8461   BioseqPtr   protbsp;
8462 
8463   if (sfp == NULL || field == NULL)
8464   {
8465     return NULL;
8466   }
8467 
8468   // for gene fields
8469   GetGeneInfoForFeature (sfp, &grp, &gene);
8470 
8471   // for protein fields
8472   prp = GetProtRefForFeature (sfp);
8473 
8474   /* fields common to all features */
8475   /* note, also known as comment */
8476   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note)
8477       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue)))
8478   {
8479     if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
8480     {
8481       str = StringSave (sfp->comment);
8482     }
8483   }
8484   /* db-xref */
8485   if (str == NULL
8486       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref)
8487           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue))))
8488   {
8489     str = GetDbxrefString (sfp->dbxref, scp);
8490   }
8491   /* exception */
8492   if (str == NULL
8493       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception)
8494           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue))))
8495   {
8496     if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint(sfp->except_text, scp))
8497     {
8498       str = StringSave (sfp->except_text);
8499     }
8500   }
8501   /* evidence */
8502   if (str == NULL
8503       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence)
8504           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue))))
8505   {
8506     if (sfp->exp_ev == 1)
8507     {
8508       str = StringSave ("experimental");
8509     }
8510     else if (sfp->exp_ev == 2)
8511     {
8512       str = StringSave ("non-experimental");
8513     }
8514     if (!DoesStringMatchConstraint(str, scp)) {
8515       str = MemFree (str);
8516     }
8517   }
8518 
8519   /* citation */
8520   if (str == NULL
8521       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation)
8522           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue))))
8523   {
8524     str = GetCitationTextFromFeature (sfp, scp, batch_extra == NULL ? NULL : batch_extra->cit_list);
8525   }
8526 
8527   /* location */
8528   if (str == NULL
8529       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location)
8530           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue))))
8531   {
8532     str = SeqLocPrintUseBestID (sfp->location);
8533   }
8534 
8535   /* pseudo */
8536   if (str == NULL
8537        && (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
8538             || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue)))
8539   {
8540     str = GetFirstGBQualMatch (sfp->qual, "pseudogene", 0, scp);
8541     if (str == NULL && sfp->pseudo) {
8542       str = StringSave ("unqualified");
8543     }
8544   }
8545 
8546   /* fields common to some features */
8547   /* product */
8548   if (str == NULL
8549       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product)
8550           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue))))
8551   {
8552     if (prp != NULL) {
8553       str = GetFirstValNodeStringMatch (prp->name, scp);
8554     } else if (sfp->data.choice == SEQFEAT_RNA) {
8555       str = GetRNAProductString (sfp, scp);
8556     }
8557   }
8558 
8559   /* Gene fields */
8560   /* locus */
8561   if (str == NULL
8562        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene)
8563            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue)))
8564        && grp != NULL)
8565   {
8566     if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint(grp->locus, scp))
8567     {
8568       str = StringSave (grp->locus);
8569     }
8570   }
8571   /* description */
8572   if (str == NULL
8573        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description)
8574            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
8575        && grp != NULL)
8576   {
8577     if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp))
8578     {
8579       str = StringSave (grp->desc);
8580     }
8581   }
8582   /* maploc */
8583   if (str == NULL
8584        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map)
8585            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue)))
8586        && grp != NULL)
8587   {
8588     if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp))
8589     {
8590       str = StringSave (grp->maploc);
8591     }
8592   }
8593   /* allele */
8594   if (str == NULL
8595        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele)
8596            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue)))
8597        && grp != NULL
8598        && sfp->idx.subtype != FEATDEF_variation)
8599   {
8600     if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp))
8601     {
8602       str = StringSave (grp->allele);
8603     }
8604   }
8605   /* locus_tag */
8606   if (str == NULL
8607        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag)
8608            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue)))
8609        && grp != NULL)
8610   {
8611     if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp))
8612     {
8613       str = StringSave (grp->locus_tag);
8614     }
8615   }
8616   /* synonym */
8617   if (str == NULL
8618        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym)
8619            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue)))
8620        && grp != NULL)
8621   {
8622     str = GetFirstValNodeStringMatch (grp->syn, scp);
8623   }
8624   /* gene comment */
8625   if (str == NULL
8626       && field->choice == FeatQualChoice_legal_qual
8627       && field->data.intvalue == Feat_qual_legal_gene_comment
8628       && gene != NULL
8629       && !StringHasNoText (gene->comment)
8630       && DoesStringMatchConstraint (gene->comment, scp)) {
8631     str = StringSave (gene->comment);
8632   }
8633 
8634 
8635   /* protein fields */
8636   /* note - product handled above */
8637   /* description */
8638   if (str == NULL
8639        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description)
8640            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
8641        && prp != NULL)
8642   {
8643     if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
8644       str = StringSave (prp->desc);
8645     }
8646   }
8647   /* ec_number */
8648   if (str == NULL
8649        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number)
8650            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue)))
8651        && prp != NULL)
8652   {
8653     str = GetFirstValNodeStringMatch (prp->ec, scp);
8654   }
8655   /* activity */
8656   if (str == NULL
8657        && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity)
8658            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue)))
8659        && prp != NULL)
8660   {
8661     str = GetFirstValNodeStringMatch (prp->activity, scp);
8662   }
8663 
8664   /* coding region fields */
8665   /* transl_except */
8666   if (str == NULL
8667       && field->choice == FeatQualChoice_legal_qual
8668       && field->data.intvalue == Feat_qual_legal_transl_except
8669       && sfp->data.choice == SEQFEAT_CDREGION)
8670   {
8671     str = GetCodeBreakString (sfp);
8672   }
8673   /* transl_table */
8674   if (str == NULL
8675       && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
8676       && sfp->data.choice == SEQFEAT_CDREGION
8677       && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL)
8678   {
8679     if (crp->genetic_code != NULL && (vnp = crp->genetic_code->data.ptrvalue) != NULL
8680         && vnp->choice == 2) {
8681       sprintf (buf, "%d", vnp->data.intvalue);
8682       str = StringSave (buf);
8683     }
8684   }
8685   /* translation */
8686   if (str == NULL
8687       && field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_translation
8688       && sfp->data.choice == SEQFEAT_CDREGION)
8689   {
8690     if (sfp->product != NULL)
8691     {
8692       protbsp = BioseqFindFromSeqLoc (sfp->product);
8693       str = GetSequenceByBsp (protbsp);
8694     }
8695   }
8696 
8697   /* special RNA qualifiers */
8698   /* tRNA qualifiers */
8699   /* codon-recognized */
8700   if (str == NULL
8701       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized)
8702            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))) {
8703     str = GettRNACodonsRecognized (sfp, scp);
8704   }
8705   /* anticodon */
8706   if (str == NULL
8707       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon)
8708            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))) {
8709     str = GetAnticodonLocString (sfp);
8710   }
8711   /* tag-peptide */
8712   if (str == NULL
8713       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide)
8714            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))) {
8715     str = GettmRNATagPeptide (sfp->data.value.ptrvalue, scp);
8716   }
8717   /* ncRNA_class */
8718   if (str == NULL
8719       && ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class)
8720            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))) {
8721     str = GetncRNAClass (sfp->data.value.ptrvalue, scp);
8722   }
8723 
8724   /* codon-start */
8725   if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start
8726       && sfp->data.choice == SEQFEAT_CDREGION)
8727   {
8728     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
8729     if (crp->frame == 1 || crp->frame == 0) {
8730       str = StringSave ("1");
8731     } else {
8732       str = (CharPtr) MemNew (sizeof (Char) * 15);
8733       sprintf (str, "%d", crp->frame);
8734     }
8735     if (!DoesStringMatchConstraint (str, scp)) {
8736       str = MemFree (str);
8737     }
8738   }
8739 
8740   /* special region qualifiers */
8741   if (sfp->idx.subtype == FEATDEF_REGION
8742       && field->choice == FeatQualChoice_legal_qual
8743       && field->data.intvalue == Feat_qual_legal_name
8744       && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) {
8745     str = StringSave (sfp->data.value.ptrvalue);
8746   }
8747 
8748   /* actual GenBank qualifiers */
8749   if (str == NULL)
8750   {
8751     if (field->choice == FeatQualChoice_legal_qual)
8752     {
8753       gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield);
8754       if (gbqual > -1) {
8755         str = GetFirstGBQualMatch (sfp->qual, ParFlat_GBQual_names [gbqual].name, subfield, scp);
8756       } else {
8757         /* need to do something with non-qualifier qualifiers */
8758       }
8759     } else {
8760       str = GetFirstGBQualMatchConstraintName (sfp->qual, field->data.ptrvalue, scp);
8761     }
8762   }
8763   return str;
8764 }
8765 
8766 
GetQualFromFeatureEx(SeqFeatPtr sfp,FeatureFieldPtr field,StringConstraintPtr scp,BatchExtraPtr batch_extra)8767 NLM_EXTERN CharPtr GetQualFromFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
8768 {
8769   if (sfp == NULL || field == NULL || field->field == NULL)
8770   {
8771     return NULL;
8772   }
8773   if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
8774   {
8775     return NULL;
8776   }
8777   return GetQualFromFeatureAnyType (sfp, field->field, scp, batch_extra);
8778 
8779 }
8780 
8781 
GetQualFromFeature(SeqFeatPtr sfp,FeatureFieldPtr field,StringConstraintPtr scp)8782 NLM_EXTERN CharPtr GetQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
8783 {
8784   return GetQualFromFeatureEx (sfp, field, scp, NULL);
8785 }
8786 
8787 
RemoveCodeBreak(CdRegionPtr crp)8788 static Boolean RemoveCodeBreak (CdRegionPtr crp)
8789 
8790 {
8791   CodeBreakPtr  cbp, nextcbp;
8792 
8793   if (crp == NULL || crp->code_break == NULL) {
8794     return FALSE;
8795   }
8796 
8797   cbp = crp->code_break;
8798   while (cbp != NULL) {
8799     nextcbp = cbp->next;
8800     cbp->next = NULL;
8801     cbp = CodeBreakFree (cbp);
8802     cbp = nextcbp;
8803   }
8804   crp->code_break = NULL;
8805   return TRUE;
8806 }
8807 
8808 
RemoveQualFromFeatureAnyType(SeqFeatPtr sfp,ValNodePtr field,StringConstraintPtr scp)8809 static Boolean RemoveQualFromFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp)
8810 {
8811   Boolean     rval = FALSE;
8812   GeneRefPtr  grp = NULL;
8813   ProtRefPtr  prp = NULL;
8814   RnaRefPtr   rrp = NULL;
8815   CdRegionPtr crp;
8816   tRNAPtr     trp;
8817   Int4        gbqual, subfield;
8818   SeqFeatPtr  gene = NULL;
8819   SeqMgrFeatContext fcontext;
8820 
8821   if (sfp == NULL || field == NULL)
8822   {
8823     return FALSE;
8824   }
8825 
8826   /* for gene fields */
8827   if (sfp->idx.subtype == FEATDEF_GENE) {
8828     grp = sfp->data.value.ptrvalue;
8829     gene = sfp;
8830   } else {
8831     grp = SeqMgrGetGeneXref (sfp);
8832     if (grp == NULL) {
8833       gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
8834       if (gene != NULL) {
8835         grp = gene->data.value.ptrvalue;
8836       }
8837     } else if (SeqMgrGeneIsSuppressed (grp)) {
8838       grp = NULL;
8839     }
8840   }
8841 
8842   /* for protein fields */
8843   prp = GetProtRefForFeature (sfp);
8844 
8845   /* for RNA fields */
8846   if (sfp->data.choice == SEQFEAT_RNA) {
8847     rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
8848   }
8849 
8850   /* fields common to all features */
8851   /* note, also known as comment */
8852   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note)
8853       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue)))
8854   {
8855     if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint (sfp->comment, scp))
8856     {
8857       sfp->comment = MemFree (sfp->comment);
8858       rval = TRUE;
8859     }
8860   }
8861   /* db-xref */
8862   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref)
8863       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue)))
8864   {
8865     rval = RemoveDbxrefString (&(sfp->dbxref), scp);
8866   }
8867   /* exception */
8868   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception)
8869           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue)))
8870   {
8871     if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint (sfp->except_text, scp))
8872     {
8873       sfp->except_text = MemFree (sfp->except_text);
8874       sfp->excpt = FALSE;
8875       rval = TRUE;
8876     }
8877   }
8878   /* evidence */
8879   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence)
8880           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue)))
8881   {
8882     if ((sfp->exp_ev == 1 && DoesStringMatchConstraint("experimental", scp))
8883         || (sfp->exp_ev == 2 && DoesStringMatchConstraint("non-experimental", scp))) {
8884       sfp->exp_ev = 0;
8885       rval = TRUE;
8886     }
8887   }
8888 
8889   /* citation */
8890   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation)
8891       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue)))
8892   {
8893     if (sfp->cit != NULL) {
8894       sfp->cit = PubSetFree (sfp->cit);
8895       rval = TRUE;
8896     }
8897   }
8898 
8899   /* location */
8900   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location)
8901       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue)))
8902   {
8903     if (sfp->location != NULL) {
8904       sfp->location = SeqLocFree (sfp->location);
8905       rval = TRUE;
8906     }
8907   }
8908 
8909   /* pseudo */
8910   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
8911       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue)))
8912   {
8913     if (gene != NULL) {
8914       if (gene->pseudo) {
8915         gene->pseudo = FALSE;
8916         rval = TRUE;
8917       }
8918       rval |= RemoveGBQualMatch (&(gene->qual), "pseudogene", 0, NULL);
8919     }
8920     if (sfp->pseudo) {
8921       sfp->pseudo = FALSE;
8922       rval = TRUE;
8923     }
8924     rval |= RemoveGBQualMatch (&(sfp->qual), "pseudogene", 0, NULL);
8925     return rval;
8926   }
8927 
8928   /* fields common to some features */
8929   /* product */
8930   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product)
8931           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue)))
8932   {
8933     if (prp != NULL) {
8934       rval = RemoveValNodeStringMatch (&(prp->name), scp);
8935     } else if (sfp->data.choice == SEQFEAT_RNA) {
8936       rval = RemoveRNAProductString (sfp, scp);
8937     }
8938   }
8939 
8940   /* Gene fields */
8941   /* locus */
8942   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene)
8943        || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue)))
8944       && grp != NULL)
8945   {
8946     if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint (grp->locus, scp)) {
8947       grp->locus = MemFree (grp->locus);
8948       rval = TRUE;
8949     }
8950   }
8951   /* description */
8952   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description)
8953        || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
8954       && grp != NULL)
8955   {
8956     if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp))
8957     {
8958       grp->desc = MemFree (grp->desc);
8959       rval = TRUE;
8960     }
8961   }
8962   /* maploc */
8963   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map)
8964            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue)))
8965        && grp != NULL)
8966   {
8967     if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp))
8968     {
8969       grp->maploc = MemFree (grp->maploc);
8970       rval = TRUE;
8971     }
8972   }
8973   /* allele */
8974   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele)
8975            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue)))
8976       && grp != NULL
8977       && sfp->idx.subtype != FEATDEF_variation)
8978   {
8979     if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp))
8980     {
8981       grp->allele = MemFree (grp->allele);
8982       rval = TRUE;
8983     }
8984   }
8985   /* locus_tag */
8986   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag)
8987            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue)))
8988        && grp != NULL)
8989   {
8990     if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp))
8991     {
8992       grp->locus_tag = MemFree (grp->locus_tag);
8993       rval = TRUE;
8994     }
8995   }
8996   /* synonym */
8997   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym)
8998            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue)))
8999        && grp != NULL)
9000   {
9001     rval = RemoveValNodeStringMatch (&(grp->syn), scp);
9002   }
9003   /* gene comment */
9004   if (field->choice == FeatQualChoice_legal_qual
9005       && field->data.intvalue == Feat_qual_legal_gene_comment
9006       && gene != NULL
9007       && !StringHasNoText (gene->comment)
9008       && DoesStringMatchConstraint (gene->comment, scp)) {
9009     gene->comment = MemFree (gene->comment);
9010     rval = TRUE;
9011   }
9012 
9013   /* protein fields */
9014   /* note - product handled above */
9015   /* description */
9016   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description)
9017            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
9018        && prp != NULL)
9019   {
9020     if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
9021       prp->desc = MemFree (prp->desc);
9022       rval = TRUE;
9023     }
9024   }
9025   /* ec_number */
9026   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number)
9027            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue)))
9028        && prp != NULL)
9029   {
9030     rval = RemoveValNodeStringMatch (&(prp->ec), scp);
9031   }
9032   /* activity */
9033   if (((field->choice == FeatQualChoice_legal_qual
9034         && (field->data.intvalue == Feat_qual_legal_activity
9035             || field->data.intvalue == Feat_qual_legal_function))
9036        || (field->choice == FeatQualChoice_illegal_qual
9037            && (DoesStringMatchConstraint ("activity", field->data.ptrvalue)
9038                || DoesStringMatchConstraint ("function", field->data.ptrvalue))))
9039       && prp != NULL)
9040   {
9041     rval = RemoveValNodeStringMatch (&(prp->activity), scp);
9042   }
9043 
9044   /* special coding region fields */
9045   /* transl_except */
9046   if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except
9047       && sfp->data.choice == SEQFEAT_CDREGION)
9048   {
9049     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
9050     rval = RemoveCodeBreak (crp);
9051   }
9052   /* transl_table */
9053   if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
9054       && sfp->data.choice == SEQFEAT_CDREGION
9055       && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL)
9056   {
9057     if (crp->genetic_code != NULL) {
9058       crp->genetic_code = GeneticCodeFree (crp->genetic_code);
9059       rval = TRUE;
9060     }
9061   }
9062 
9063 
9064   /* special RNA fields */
9065   /* anticodon */
9066   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_anticodon)
9067            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue)))
9068        && rrp != NULL && rrp->ext.choice == 2)
9069   {
9070     trp = (tRNAPtr) rrp->ext.value.ptrvalue;
9071     if (trp != NULL && trp->anticodon != NULL) {
9072       trp->anticodon = SeqLocFree (trp->anticodon);
9073       rval = TRUE;
9074     }
9075   }
9076   /* codons recognized */
9077   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codons_recognized)
9078            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue)))
9079        && rrp != NULL && rrp->ext.choice == 2)
9080   {
9081     rval = RemovetRNACodons_Recognized (sfp);
9082   }
9083   /* tag_peptide */
9084   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_tag_peptide)
9085            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue)))
9086        && rrp != NULL && rrp->ext.choice == 3)
9087   {
9088     rval = RemovetmRNATagPeptide (rrp, scp);
9089   }
9090   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ncRNA_class)
9091            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue)))
9092        && rrp != NULL && rrp->ext.choice == 3)
9093   {
9094     rval = RemovencRNAClass (rrp, scp);
9095   }
9096 
9097   /* special region qualifiers */
9098   if (sfp->idx.subtype == FEATDEF_REGION
9099       && field->choice == FeatQualChoice_legal_qual
9100       && field->data.intvalue == Feat_qual_legal_name
9101       && !StringHasNoText (sfp->data.value.ptrvalue)
9102       && DoesStringMatchConstraint (sfp->data.value.ptrvalue, scp)) {
9103     sfp->data.value.ptrvalue = MemFree (sfp->data.value.ptrvalue);
9104     rval = TRUE;
9105   }
9106 
9107 
9108 
9109   if (!rval) {
9110     /* actual GenBank qualifiers */
9111     if (field->choice == FeatQualChoice_legal_qual)
9112     {
9113       gbqual = GetGBQualFromFeatQual (field->data.intvalue, &subfield);
9114       if (gbqual > -1) {
9115         rval = RemoveGBQualMatch (&(sfp->qual), ParFlat_GBQual_names [gbqual].name, subfield, scp);
9116       } else {
9117         /* need to do something with non-qualifier qualifiers */
9118       }
9119     } else {
9120       rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->data.ptrvalue, scp);
9121     }
9122   }
9123 
9124   return rval;
9125 }
9126 
9127 
RemoveQualFromFeature(SeqFeatPtr sfp,FeatureFieldPtr field,StringConstraintPtr scp)9128 NLM_EXTERN Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
9129 {
9130   if (sfp == NULL || field == NULL || field->field == NULL)
9131   {
9132     return FALSE;
9133   }
9134   if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
9135   {
9136     return FALSE;
9137   }
9138 
9139   return RemoveQualFromFeatureAnyType (sfp, field->field, scp);
9140 }
9141 
9142 
ChooseBestFrame(SeqFeatPtr sfp)9143 static Boolean ChooseBestFrame (SeqFeatPtr sfp)
9144 {
9145   CdRegionPtr  crp;
9146   Uint1        new_frame = 0, i, orig_frame;
9147   ByteStorePtr bs;
9148   Int4         lens [3];
9149   Int4         max;
9150   Boolean      retval = TRUE;
9151 
9152   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE;
9153 
9154   crp = sfp->data.value.ptrvalue;
9155   if (crp == NULL) return FALSE;
9156   orig_frame = crp->frame;
9157 
9158   max = 0;
9159   for (i = 1; i <= 3; i++) {
9160     crp->frame = i;
9161     bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
9162     lens[i - 1] = BSLen (bs);
9163     BSFree (bs);
9164     if (lens[i - 1] > max) {
9165       max = lens[i - 1];
9166       new_frame = i;
9167     }
9168   }
9169   for (i = 1; i <= 3; i++) {
9170     if (lens [i - 1] == max && i != new_frame) {
9171       retval = FALSE;
9172     }
9173   }
9174   if (retval) {
9175     crp->frame = new_frame;
9176   } else {
9177     crp->frame = orig_frame;
9178   }
9179   return retval;
9180 }
9181 
9182 
ChooseMatchingFrame(SeqFeatPtr sfp)9183 static Boolean ChooseMatchingFrame (SeqFeatPtr sfp)
9184 {
9185   CdRegionPtr  crp;
9186   BioseqPtr    protbsp;
9187   CharPtr      expected_translation, frame_translation;
9188   Uint1        new_frame = 0, i, orig_frame;
9189   ByteStorePtr bs;
9190   Boolean      retval = FALSE;
9191 
9192   if (sfp == NULL
9193       || sfp->data.choice != SEQFEAT_CDREGION
9194       || sfp->product == NULL
9195       || (protbsp = BioseqFindFromSeqLoc (sfp->product)) == NULL
9196       || (crp = sfp->data.value.ptrvalue) == NULL) {
9197     return FALSE;
9198   }
9199 
9200   expected_translation = GetSequenceByBsp (protbsp);
9201   if (StringHasNoText (expected_translation)) {
9202     expected_translation = MemFree (expected_translation);
9203     return FALSE;
9204   }
9205 
9206   orig_frame = crp->frame;
9207 
9208   for (i = 1; i <= 3 && !retval; i++) {
9209     crp->frame = i;
9210     bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
9211     frame_translation = BSMerge (bs, NULL);
9212     if (StringCmp (frame_translation, expected_translation) == 0) {
9213       new_frame = i;
9214       retval = TRUE;
9215     }
9216     BSFree (bs);
9217     frame_translation = MemFree (frame_translation);
9218   }
9219   expected_translation = MemFree (expected_translation);
9220 
9221   if (new_frame == 1 && orig_frame == 0) {
9222     new_frame = 0;
9223   }
9224 
9225   if (retval) {
9226     crp->frame = new_frame;
9227     if (new_frame == orig_frame) {
9228       /* didn't actually change the frame */
9229       retval = FALSE;
9230     }
9231   } else {
9232     crp->frame = orig_frame;
9233   }
9234 
9235   return retval;
9236 }
9237 
9238 
CreateGeneForFeature(SeqFeatPtr sfp)9239 static SeqFeatPtr CreateGeneForFeature (SeqFeatPtr sfp)
9240 {
9241   BioseqPtr  bsp;
9242   SeqFeatPtr gene = NULL;
9243   SeqLocPtr  slp_new;
9244 
9245   if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) {
9246     return NULL;
9247   } else {
9248     bsp = BioseqFindFromSeqLoc (sfp->location);
9249     if (bsp != NULL) {
9250       gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location);
9251       if (gene != NULL) {
9252         slp_new = SeqLocMerge (bsp, gene->location, NULL, TRUE, FALSE, FALSE);
9253         if (slp_new != NULL && slp_new != gene->location) {
9254           gene->location = SeqLocFree (gene->location);
9255           gene->location = slp_new;
9256         }
9257         gene->data.value.ptrvalue = GeneRefNew();
9258       }
9259     }
9260   }
9261   return gene;
9262 }
9263 
9264 
9265 static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds);
9266 
9267 
SetCitationTextOnFeature(SeqFeatPtr sfp,StringConstraintPtr scp,CharPtr value,Uint2 existing_text,ValNodePtr cit_list)9268 static Boolean SetCitationTextOnFeature (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, ValNodePtr cit_list)
9269 {
9270   SeqEntryPtr sep;
9271   BioseqPtr   bsp;
9272   ValNodePtr  list = NULL, vnp;
9273   Boolean     rval = FALSE, already_present = FALSE;
9274   Int4        new_number, serial_number;
9275   ValNodePtr  min_pub, new_list;
9276 
9277   if (sfp == NULL) {
9278     return FALSE;
9279   }
9280 
9281   if (sfp->cit != NULL && existing_text == ExistingTextOption_leave_old) {
9282     return FALSE;
9283   }
9284 
9285   if (!StringIsAllDigits (value)) {
9286     return FALSE;
9287   }
9288 
9289   new_number = atoi (value);
9290 
9291   bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp);
9292 
9293   if (cit_list == NULL) {
9294     /* list not provided - must create now */
9295     sep = SeqMgrGetSeqEntryForData (bsp);
9296     list = GetCitListsForSeqEntry (sep);
9297     cit_list = list;
9298   }
9299 
9300   min_pub = GetMinPubForCitationNumber (bsp, new_number, cit_list);
9301   if (min_pub == NULL) {
9302     list = PubSerialNumberListFree (list);
9303     return FALSE;
9304   }
9305 
9306   if (existing_text == ExistingTextOption_replace_old) {
9307     sfp->cit = PubSetFree (sfp->cit);
9308     sfp->cit = ValNodeNew (NULL);
9309     sfp->cit->choice = 1;
9310     new_list = NULL;
9311     ValNodeLink (&new_list, AsnIoMemCopy (min_pub->data.ptrvalue, (AsnReadFunc) PubAsnRead, (AsnWriteFunc) PubAsnWrite));
9312     sfp->cit->data.ptrvalue = new_list;
9313     rval = TRUE;
9314   } else {
9315     for (vnp = sfp->cit->data.ptrvalue; vnp != NULL && !already_present; vnp = vnp->next) {
9316       serial_number = GetCitationNumberForMinPub (bsp, vnp, cit_list);
9317       if (serial_number == new_number) {
9318         already_present = TRUE;
9319       }
9320     }
9321     if (!already_present) {
9322       new_list = sfp->cit->data.ptrvalue;
9323       ValNodeLink (&new_list, AsnIoMemCopy (min_pub->data.ptrvalue, (AsnReadFunc) PubAsnRead, (AsnWriteFunc) PubAsnWrite));
9324       sfp->cit->data.ptrvalue = new_list;
9325       rval = TRUE;
9326     }
9327   }
9328 
9329   list = PubSerialNumberListFree (list);
9330 
9331   return rval;
9332 }
9333 
9334 
SetFeatureLocation(SeqFeatPtr sfp,CharPtr value,Uint2 existing_text)9335 static Boolean SetFeatureLocation (SeqFeatPtr sfp, CharPtr value, Uint2 existing_text)
9336 {
9337   SeqLocPtr  loc;
9338   Boolean    locmap;
9339   int        num_errs;
9340   Boolean    sitesmap;
9341   SeqIdPtr   sip;
9342   Boolean    rval = FALSE;
9343 
9344   sip = SeqLocId (sfp->location);
9345   loc = Nlm_gbparseint (value, &locmap, &sitesmap, &num_errs, sip);
9346   if (loc != NULL) {
9347     switch (existing_text) {
9348       case ExistingTextOption_replace_old:
9349         sfp->location = SeqLocFree (sfp->location);
9350         sfp->location = loc;
9351         loc = NULL;
9352         rval = TRUE;
9353         break;
9354       case ExistingTextOption_append_semi:
9355       case ExistingTextOption_append_space:
9356       case ExistingTextOption_append_colon:
9357       case ExistingTextOption_append_comma:
9358       case ExistingTextOption_append_none:
9359         SeqLocAdd (&(sfp->location), loc, FALSE, FALSE);
9360         SeqLocPackage (sfp->location);
9361         loc = NULL;
9362         rval = TRUE;
9363         break;
9364       case ExistingTextOption_prefix_semi:
9365       case ExistingTextOption_prefix_space:
9366       case ExistingTextOption_prefix_colon:
9367       case ExistingTextOption_prefix_comma:
9368       case ExistingTextOption_prefix_none:
9369         SeqLocAdd (&loc, sfp->location, FALSE, FALSE);
9370         SeqLocPackage (loc);
9371         sfp->location = loc;
9372         loc = NULL;
9373         rval = TRUE;
9374         break;
9375       case ExistingTextOption_leave_old:
9376         if (sfp->location == NULL) {
9377           sfp->location = loc;
9378           loc = NULL;
9379           rval = TRUE;
9380         }
9381         break;
9382     }
9383   }
9384   loc = SeqLocFree (loc);
9385   return rval;
9386 }
9387 
9388 
SetGeneticCode(CdRegionPtr crp,Int4 value)9389 static Boolean SetGeneticCode (CdRegionPtr crp, Int4 value)
9390 {
9391   ValNodePtr vnp;
9392 
9393   if (crp == NULL) {
9394     return FALSE;
9395   }
9396   if (crp->genetic_code != NULL) {
9397     crp->genetic_code = GeneticCodeFree (crp->genetic_code);
9398   }
9399   crp->genetic_code = GeneticCodeNew ();
9400   vnp = ValNodeNew (NULL);
9401   vnp->choice = 2;
9402   vnp->data.intvalue = value;
9403   crp->genetic_code->data.ptrvalue = vnp;
9404   return TRUE;
9405 }
9406 
9407 
SetQualOnFeatureAnyType(SeqFeatPtr sfp,ValNodePtr field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text,BatchExtraPtr batch_extra)9408 static Boolean SetQualOnFeatureAnyType (SeqFeatPtr sfp, ValNodePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra)
9409 {
9410   Boolean rval = FALSE;
9411   Boolean    matched_term = FALSE;
9412   GeneRefPtr grp = NULL;
9413   ProtRefPtr prp = NULL;
9414   CharPtr    tmp;
9415   CdRegionPtr crp;
9416   SeqFeatPtr  gene = NULL;
9417   SeqMgrFeatContext fcontext;
9418 
9419   if (sfp == NULL || field == NULL)
9420   {
9421     return FALSE;
9422   }
9423 
9424   // for gene fields
9425   if (sfp->idx.subtype == FEATDEF_GENE) {
9426     grp = sfp->data.value.ptrvalue;
9427     gene = sfp;
9428   } else {
9429     grp = SeqMgrGetGeneXref (sfp);
9430     if (grp == NULL) {
9431       gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
9432       if (gene != NULL) {
9433         grp = gene->data.value.ptrvalue;
9434       }
9435     }
9436   }
9437 
9438   // for protein fields
9439   prp = GetProtRefForFeature (sfp);
9440 
9441   /* fields common to all features */
9442   /* note, also known as comment */
9443   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_note)
9444       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->data.ptrvalue)))
9445   {
9446     if (DoesStringMatchConstraint(sfp->comment, scp))
9447     {
9448       rval = SetStringValue ( &(sfp->comment), value, existing_text);
9449     }
9450     matched_term = TRUE;
9451   }
9452   /* db-xref */
9453   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_db_xref)
9454           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->data.ptrvalue)))
9455   {
9456     rval = SetDbxrefString (&(sfp->dbxref), scp, value, existing_text);
9457   }
9458   /* exception */
9459   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_exception)
9460           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->data.ptrvalue)))
9461   {
9462     if (DoesStringMatchConstraint(sfp->except_text, scp))
9463     {
9464       rval = SetStringValue ( &(sfp->except_text), value, existing_text);
9465       if (StringHasNoText(sfp->except_text)) {
9466         sfp->excpt = FALSE;
9467       } else {
9468         sfp->excpt = TRUE;
9469       }
9470     }
9471     matched_term = TRUE;
9472   }
9473   /* evidence */
9474   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_evidence)
9475           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->data.ptrvalue)))
9476   {
9477     tmp = NULL;
9478     if (sfp->exp_ev == 1)
9479     {
9480       tmp = StringSave ("experimental");
9481     }
9482     else if (sfp->exp_ev == 2)
9483     {
9484       tmp = StringSave ("non-experimental");
9485     }
9486     if (DoesStringMatchConstraint(tmp, scp)) {
9487       rval = SetStringValue (&tmp, value, existing_text);
9488       if (rval) {
9489         rval = FALSE;
9490         if (StringICmp (tmp, "experimental") == 0) {
9491           sfp->exp_ev = 1;
9492           rval = TRUE;
9493         } else if (StringICmp (tmp, "non-experimental") == 0) {
9494           sfp->exp_ev = 2;
9495           rval = TRUE;
9496         } else if (StringHasNoText (tmp)) {
9497           sfp->exp_ev = 0;
9498           rval = TRUE;
9499         }
9500       }
9501     }
9502     tmp = MemFree (tmp);
9503   }
9504 
9505   /* citation */
9506   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_citation)
9507           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("citation", field->data.ptrvalue)))
9508   {
9509     rval = SetCitationTextOnFeature (sfp, scp, value, existing_text, batch_extra == NULL ? NULL : batch_extra->cit_list);
9510   }
9511 
9512   /* location */
9513   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_location)
9514           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("location", field->data.ptrvalue)))
9515   {
9516     rval = SetFeatureLocation (sfp, value, existing_text);
9517     return rval;
9518   }
9519 
9520   /* pseudo */
9521   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_pseudo)
9522       || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("pseudogene", field->data.ptrvalue)))
9523   {
9524     if (gene != NULL) {
9525       if (!gene->pseudo) {
9526         gene->pseudo = TRUE;
9527         rval = TRUE;
9528       }
9529       if (StringICmp (value, "Unqualified") != 0) {
9530         rval |= SetStringInGBQualList (&(gene->qual), field, scp, value, existing_text);
9531       }
9532       return rval;
9533     } else {
9534       if (!sfp->pseudo) {
9535         sfp->pseudo = TRUE;
9536         rval = TRUE;
9537       }
9538       if (StringICmp (value, "Unqualified") != 0) {
9539         rval |= SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text);
9540       }
9541       return rval;
9542     }
9543   }
9544 
9545   /* fields common to some features */
9546   /* product */
9547   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_product)
9548           || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->data.ptrvalue)))
9549   {
9550     if (prp != NULL) {
9551       rval = SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
9552     } else if (sfp->data.choice == SEQFEAT_RNA) {
9553       rval = SetRNAProductString (sfp, scp, value, existing_text);
9554     }
9555     matched_term = TRUE;
9556   }
9557 
9558   /* Gene fields */
9559   /* locus */
9560   if ((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene)
9561            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->data.ptrvalue)))
9562   {
9563     if (grp == NULL && IsStringConstraintEmpty (scp))
9564     {
9565       /* create new gene feature */
9566       gene = CreateGeneForFeature (sfp);
9567       if (gene != NULL)
9568       {
9569         grp = (GeneRefPtr) gene->data.value.ptrvalue;
9570       }
9571     }
9572     if (grp != NULL && DoesStringMatchConstraint(grp->locus, scp))
9573     {
9574       rval = SetStringValue (&(grp->locus), value, existing_text);
9575     }
9576     matched_term = TRUE;
9577   }
9578 
9579   /* description */
9580   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_gene_description)
9581            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
9582        && grp != NULL)
9583   {
9584     if (DoesStringMatchConstraint(grp->desc, scp))
9585     {
9586       rval = SetStringValue (&(grp->desc), value, existing_text);
9587     }
9588     matched_term = TRUE;
9589   }
9590   /* maploc */
9591   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_map)
9592            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->data.ptrvalue)))
9593        && grp != NULL)
9594   {
9595     if (DoesStringMatchConstraint(grp->maploc, scp))
9596     {
9597       rval = SetStringValue (&(grp->maploc), value, existing_text);
9598     }
9599     matched_term = TRUE;
9600   }
9601   /* allele */
9602   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_allele)
9603            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->data.ptrvalue)))
9604       && grp != NULL
9605       && sfp->idx.subtype != FEATDEF_variation)
9606   {
9607     if (DoesStringMatchConstraint(grp->allele, scp))
9608     {
9609       rval = SetStringValue (&(grp->allele), value, existing_text);
9610     }
9611     matched_term = TRUE;
9612   }
9613   /* locus_tag */
9614   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_locus_tag)
9615            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->data.ptrvalue)))
9616        && grp != NULL)
9617   {
9618     if (DoesStringMatchConstraint(grp->locus_tag, scp))
9619     {
9620       rval = SetStringValue (&(grp->locus_tag), value, existing_text);
9621     }
9622     matched_term = TRUE;
9623   }
9624   /* synonym */
9625   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_synonym)
9626            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->data.ptrvalue)))
9627        && grp != NULL)
9628   {
9629     rval = SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text);
9630     matched_term = TRUE;
9631   }
9632   /* gene comment */
9633   if (field->choice == FeatQualChoice_legal_qual
9634       && field->data.intvalue == Feat_qual_legal_gene_comment
9635       && gene != NULL) {
9636     rval = SetStringValue (&(gene->comment), value, existing_text);
9637     matched_term = TRUE;
9638   }
9639 
9640   /* protein fields */
9641   /* note - product handled above */
9642   /* description */
9643   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_description)
9644            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->data.ptrvalue)))
9645        && prp != NULL)
9646   {
9647     if (DoesStringMatchConstraint(prp->desc, scp)) {
9648       rval = SetStringValue (&(prp->desc), value, existing_text);
9649     }
9650   }
9651   /* ec_number */
9652   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_ec_number)
9653            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->data.ptrvalue)))
9654        && prp != NULL)
9655   {
9656     rval = SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
9657   }
9658   /* activity */
9659   if (((field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_activity)
9660            || (field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->data.ptrvalue)))
9661        && prp != NULL)
9662   {
9663     rval = SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
9664   }
9665 
9666   /* special coding region fields */
9667   /* codon start */
9668   /* note - if product existed before, it will be retranslated */
9669   if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_codon_start
9670       && sfp->data.choice == SEQFEAT_CDREGION)
9671   {
9672     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
9673     if (StringICmp (value, "best") == 0)
9674     {
9675       rval = ChooseBestFrame (sfp);
9676     }
9677     else if (StringICmp (value, "match") == 0)
9678     {
9679       rval = ChooseMatchingFrame (sfp);
9680     }
9681     else if (StringCmp (value, "1") == 0)
9682     {
9683       crp->frame = 1;
9684       rval = TRUE;
9685     }
9686     else if (StringCmp (value, "2") == 0)
9687     {
9688       crp->frame = 2;
9689       rval = TRUE;
9690     }
9691     else if (StringCmp (value, "3") == 0)
9692     {
9693       crp->frame = 3;
9694       rval = TRUE;
9695     }
9696     if (rval && sfp->product != NULL) {
9697       AdjustProteinSequenceForReadingFrame (sfp);
9698     }
9699     matched_term = TRUE;
9700   }
9701   /* transl_except */
9702   if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_except
9703       && sfp->data.choice == SEQFEAT_CDREGION)
9704   {
9705     crp = (CdRegionPtr) sfp->data.value.ptrvalue;
9706     if (crp->code_break != NULL && existing_text == ExistingTextOption_leave_old) {
9707       matched_term = TRUE;
9708     } else {
9709       if (crp->code_break != NULL && existing_text == ExistingTextOption_replace_old) {
9710         RemoveCodeBreak (crp);
9711       }
9712       rval = ParseCodeBreak (sfp, value, 0);
9713     }
9714   }
9715   /* transl_table */
9716   if (field->choice == FeatQualChoice_legal_qual && field->data.intvalue == Feat_qual_legal_transl_table
9717       && sfp->data.choice == SEQFEAT_CDREGION
9718       && (crp = (CdRegionPtr) sfp->data.value.ptrvalue) != NULL
9719       && StringIsAllDigits (value))
9720   {
9721     if (crp->genetic_code != NULL && existing_text == ExistingTextOption_leave_old) {
9722       matched_term = TRUE;
9723     } else {
9724       rval = SetGeneticCode (crp, atoi (value));
9725     }
9726   }
9727 
9728 
9729   /* special RNA fields */
9730   /* tRNA fields */
9731   if (sfp->idx.subtype == FEATDEF_tRNA
9732       && ((field->choice == FeatQualChoice_legal_qual
9733            && field->data.intvalue == Feat_qual_legal_codons_recognized)
9734           || (field->choice == FeatQualChoice_illegal_qual
9735            && DoesStringMatchConstraint ("codon-recognized", field->data.ptrvalue))))
9736   {
9737     rval = SettRNACodons_Recognized (sfp, scp, value, existing_text);
9738   }
9739 
9740   if (sfp->idx.subtype == FEATDEF_tRNA
9741       && ((field->choice == FeatQualChoice_legal_qual
9742            && field->data.intvalue == Feat_qual_legal_anticodon)
9743           || (field->choice == FeatQualChoice_illegal_qual
9744            && DoesStringMatchConstraint ("anticodon", field->data.ptrvalue))))
9745   {
9746     rval = SetAnticodon (sfp, scp, value, existing_text);
9747   }
9748 
9749   if (sfp->idx.subtype == FEATDEF_tmRNA
9750       && ((field->choice == FeatQualChoice_legal_qual
9751            && field->data.intvalue == Feat_qual_legal_tag_peptide)
9752           || (field->choice == FeatQualChoice_illegal_qual
9753            && DoesStringMatchConstraint ("tag-peptide", field->data.ptrvalue))))
9754   {
9755     rval = SettmRNATagPeptide (sfp->data.value.ptrvalue, scp, value, existing_text);
9756   }
9757 
9758   if (sfp->idx.subtype == FEATDEF_ncRNA
9759       && ((field->choice == FeatQualChoice_legal_qual
9760            && field->data.intvalue == Feat_qual_legal_ncRNA_class)
9761           || (field->choice == FeatQualChoice_illegal_qual
9762            && DoesStringMatchConstraint ("ncRNA_class", field->data.ptrvalue))))
9763   {
9764     rval = SetncRNAClass (sfp->data.value.ptrvalue, scp, value, existing_text);
9765     matched_term = TRUE;
9766   }
9767 
9768   /* special region qualifiers */
9769   if (sfp->idx.subtype == FEATDEF_REGION
9770       && field->choice == FeatQualChoice_legal_qual
9771       && field->data.intvalue == Feat_qual_legal_name
9772       && DoesStringMatchConstraint(sfp->data.value.ptrvalue, scp))
9773   {
9774     rval = SetStringValue ((CharPtr PNTR)(&(sfp->data.value.ptrvalue)), value, existing_text);
9775     matched_term = TRUE;
9776   }
9777 
9778   /* actual GenBank qualifiers */
9779   if (!rval && !matched_term)
9780   {
9781     rval = SetStringInGBQualList (&(sfp->qual), field, scp, value, existing_text);
9782   }
9783   return rval;
9784 }
9785 
9786 
SetQualOnFeatureEx(SeqFeatPtr sfp,FeatureFieldPtr field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text,BatchExtraPtr batch_extra)9787 static Boolean SetQualOnFeatureEx (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra)
9788 {
9789   if (sfp == NULL || field == NULL || field->field == NULL)
9790   {
9791     return FALSE;
9792   }
9793   if (field->type != Macro_feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
9794   {
9795     return FALSE;
9796   }
9797 
9798   return SetQualOnFeatureAnyType (sfp, field->field, scp, value, existing_text, batch_extra);
9799 }
9800 
9801 
SetQualOnFeature(SeqFeatPtr sfp,FeatureFieldPtr field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)9802 NLM_EXTERN Boolean SetQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
9803 {
9804   return SetQualOnFeatureEx (sfp, field, scp, value, existing_text, NULL);
9805 }
9806 
9807 
GetRNAQualFromFeature(SeqFeatPtr sfp,RnaQualPtr rq,StringConstraintPtr scp,BatchExtraPtr batch_extra)9808 NLM_EXTERN CharPtr GetRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, BatchExtraPtr batch_extra)
9809 {
9810   ValNode vn;
9811 
9812   if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type))
9813   {
9814     return NULL;
9815   }
9816 
9817   MemSet (&vn, 0, sizeof (ValNode));
9818   vn.choice = FeatQualChoice_legal_qual;
9819   vn.data.intvalue = GetFeatQualForRnaField (rq->field);
9820 
9821   return GetQualFromFeatureAnyType (sfp, &vn, scp, batch_extra);
9822 }
9823 
9824 
RemoveRNAQualFromFeature(SeqFeatPtr sfp,RnaQualPtr rq,StringConstraintPtr scp)9825 NLM_EXTERN Boolean RemoveRNAQualFromFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp)
9826 {
9827   ValNode vn;
9828 
9829   if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type))
9830   {
9831     return FALSE;
9832   }
9833 
9834   MemSet (&vn, 0, sizeof (ValNode));
9835   vn.choice = FeatQualChoice_legal_qual;
9836   vn.data.intvalue = GetFeatQualForRnaField (rq->field);
9837 
9838   return RemoveQualFromFeatureAnyType (sfp, &vn, scp);
9839 }
9840 
9841 
SetRNAQualOnFeature(SeqFeatPtr sfp,RnaQualPtr rq,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)9842 NLM_EXTERN Boolean SetRNAQualOnFeature (SeqFeatPtr sfp, RnaQualPtr rq, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
9843 {
9844   ValNode vn;
9845 
9846   if (sfp == NULL || rq == NULL || !DoesFeatureMatchRnaType(sfp, rq->type))
9847   {
9848     return FALSE;
9849   }
9850 
9851   MemSet (&vn, 0, sizeof (ValNode));
9852   vn.choice = FeatQualChoice_legal_qual;
9853   vn.data.intvalue = GetFeatQualForRnaField (rq->field);
9854 
9855   return SetQualOnFeatureAnyType (sfp, &vn, scp, value, existing_text, NULL);
9856 }
9857 
9858 
SortVnpByStringLenShortToLong(VoidPtr ptr1,VoidPtr ptr2)9859 static int LIBCALLBACK SortVnpByStringLenShortToLong (VoidPtr ptr1, VoidPtr ptr2)
9860 {
9861   ValNodePtr  vnp1;
9862   ValNodePtr  vnp2;
9863   Int4        len1, len2;
9864 
9865   if (ptr1 != NULL && ptr2 != NULL) {
9866     vnp1 = *((ValNodePtr PNTR) ptr1);
9867     vnp2 = *((ValNodePtr PNTR) ptr2);
9868     if (vnp1 != NULL && vnp2 != NULL) {
9869       len1 = StringLen (vnp1->data.ptrvalue);
9870       len2 = StringLen (vnp2->data.ptrvalue);
9871       if (len1 < len2) {
9872         return -1;
9873       } else if (len1 > len2) {
9874         return 1;
9875       } else {
9876         return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
9877       }
9878     }
9879   }
9880   return 0;
9881 }
9882 
9883 
SortVnpByStringLenLongToShort(VoidPtr ptr1,VoidPtr ptr2)9884 static int LIBCALLBACK SortVnpByStringLenLongToShort (VoidPtr ptr1, VoidPtr ptr2)
9885 {
9886   ValNodePtr  vnp1;
9887   ValNodePtr  vnp2;
9888   Int4        len1, len2;
9889 
9890   if (ptr1 != NULL && ptr2 != NULL) {
9891     vnp1 = *((ValNodePtr PNTR) ptr1);
9892     vnp2 = *((ValNodePtr PNTR) ptr2);
9893     if (vnp1 != NULL && vnp2 != NULL) {
9894       len1 = StringLen (vnp1->data.ptrvalue);
9895       len2 = StringLen (vnp2->data.ptrvalue);
9896       if (len1 < len2) {
9897         return 1;
9898       } else if (len1 > len2) {
9899         return -1;
9900       } else {
9901         return StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
9902       }
9903     }
9904   }
9905   return 0;
9906 }
9907 
9908 
SortProtNames(SeqFeatPtr sfp,Uint2 order)9909 static Boolean SortProtNames (SeqFeatPtr sfp, Uint2 order)
9910 {
9911   ProtRefPtr prp;
9912   Boolean    rval = FALSE;
9913 
9914   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT
9915       || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL
9916       || prp->name == NULL
9917       || prp->name->next == NULL) {
9918     return FALSE;
9919   }
9920   switch (order) {
9921     case Sort_order_short_to_long:
9922       if (!ValNodeIsSorted(prp->name, SortVnpByStringLenShortToLong)) {
9923         prp->name = ValNodeSort (prp->name, SortVnpByStringLenShortToLong);
9924         rval = TRUE;
9925       }
9926       break;
9927     case Sort_order_long_to_short:
9928       if (!ValNodeIsSorted(prp->name, SortVnpByStringLenLongToShort)) {
9929         prp->name = ValNodeSort (prp->name, SortVnpByStringLenLongToShort);
9930         rval = TRUE;
9931       }
9932       break;
9933     case Sort_order_alphabetical:
9934       if (!ValNodeIsSorted(prp->name, SortVnpByStringCS)) {
9935         prp->name = ValNodeSort (prp->name, SortVnpByStringCS);
9936         rval = TRUE;
9937       }
9938       break;
9939   }
9940   return rval;
9941 }
9942 
9943 
SortQualOnFeature(SeqFeatPtr sfp,FeatureFieldPtr field,Uint2 order)9944 NLM_EXTERN Boolean SortQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, Uint2 order)
9945 {
9946   SeqFeatPtr prot = NULL;
9947   BioseqPtr  protbsp;
9948   SeqMgrFeatContext context;
9949   Boolean    rval = FALSE;
9950 
9951   if (sfp == NULL || field == NULL) {
9952     return FALSE;
9953   }
9954 
9955   if (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot) {
9956     if (field->field->choice == FeatQualChoice_legal_qual
9957         && field->field->data.intvalue == Feat_qual_legal_product) {
9958       if (sfp->data.choice == SEQFEAT_CDREGION) {
9959         protbsp = BioseqFindFromSeqLoc (sfp->product);
9960         prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context);
9961       } else if (sfp->idx.subtype == FEATDEF_PROT) {
9962         prot = sfp;
9963       }
9964       rval = SortProtNames (prot, order);
9965     }
9966   }
9967 
9968   return rval;
9969 }
9970 
9971 
AddLegalFeatureField(ValNodePtr PNTR list,Uint2 featdef,Uint2 qual)9972 static void AddLegalFeatureField (ValNodePtr PNTR list, Uint2 featdef, Uint2 qual)
9973 {
9974   FeatureFieldPtr ffield;
9975   Int4            gbqual, num_subfields, i, legal_qual;
9976 
9977   if (list == NULL) return;
9978 
9979   ffield = FeatureFieldNew ();
9980   ffield->type = GetFeatureTypeFromFeatdef (featdef);
9981   ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, qual);
9982   ValNodeAddPointer (list, FieldType_feature_field, ffield);
9983 
9984   /* also add subfields */
9985   gbqual = GetGBQualFromFeatQual (qual, NULL);
9986   num_subfields = NumGbQualSubfields (gbqual);
9987   for (i = 1; i <= num_subfields; i++) {
9988     legal_qual = GetFeatQualByGBQualAndSubfield (gbqual, i);
9989     if (legal_qual > -1) {
9990       ffield = FeatureFieldNew ();
9991       ffield->type = GetFeatureTypeFromFeatdef (featdef);
9992       ValNodeAddInt (&(ffield->field), FeatQualChoice_legal_qual, legal_qual);
9993       ValNodeAddPointer (list, FieldType_feature_field, ffield);
9994     }
9995   }
9996 
9997 }
9998 
9999 
GetFieldListFromFeature(SeqFeatPtr sfp)10000 static ValNodePtr GetFieldListFromFeature (SeqFeatPtr sfp)
10001 {
10002   GeneRefPtr grp = NULL;
10003   SeqFeatPtr gene = NULL;
10004   ProtRefPtr prp = NULL;
10005   ValNodePtr list = NULL;
10006   GBQualPtr  qual;
10007   Int4       qual_num;
10008 
10009   if (sfp == NULL)
10010   {
10011     return NULL;
10012   }
10013 
10014   // for gene fields
10015   GetGeneInfoForFeature (sfp, &grp, &gene);
10016 
10017   /* add gene-specific fields */
10018   if (grp != NULL) {
10019     if (!StringHasNoText (grp->locus)) {
10020       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene);
10021     }
10022     if (!StringHasNoText (grp->allele)) {
10023       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_allele);
10024     }
10025     if (!StringHasNoText (grp->desc)) {
10026       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_gene_description);
10027     }
10028     if (!StringHasNoText (grp->maploc)) {
10029       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_map);
10030     }
10031     if (!StringHasNoText (grp->locus_tag)) {
10032       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_locus_tag);
10033     }
10034     if (grp->syn != NULL) {
10035       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_synonym);
10036     }
10037   }
10038 
10039   /* add protein-specific fields */
10040   prp = GetProtRefForFeature (sfp);
10041   if (prp != NULL) {
10042     /* product name */
10043     if (prp->name != NULL) {
10044       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_product);
10045     }
10046     /* protein description */
10047     if (!StringHasNoText (prp->desc)) {
10048       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_description);
10049     }
10050     /* ec_number */
10051     if (prp->ec != NULL) {
10052       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_ec_number);
10053     }
10054     /* activity */
10055     if (prp->activity != NULL) {
10056       AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_activity);
10057     }
10058   }
10059 
10060   /* fields common to all features */
10061   /* note, also known as comment */
10062   if (!StringHasNoText (sfp->comment)) {
10063     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_note);
10064   }
10065   /* db-xref */
10066   if (sfp->dbxref != NULL) {
10067     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_db_xref);
10068   }
10069   /* exception */
10070   if (!StringHasNoText (sfp->except_text)) {
10071     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_exception);
10072   }
10073   /* evidence */
10074   if (sfp->exp_ev > 0) {
10075     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_evidence);
10076   }
10077 
10078   /* citation */
10079   if (sfp->cit != NULL) {
10080     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_citation);
10081   }
10082 
10083   /* RNA specific */
10084   if (sfp->data.choice == SEQFEAT_RNA) {
10085     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_product);
10086   }
10087 
10088   /* coding regions */
10089   if (sfp->data.choice == SEQFEAT_CDREGION) {
10090     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_codon_start);
10091   }
10092 
10093   /* regions */
10094   if (sfp->idx.subtype == FEATDEF_REGION) {
10095     AddLegalFeatureField (&list, sfp->idx.subtype, Feat_qual_legal_name);
10096   }
10097 
10098   /* actual GenBank qualifiers */
10099   for (qual = sfp->qual; qual != NULL; qual = qual->next)
10100   {
10101     qual_num = GetFeatQualByName (qual->qual);
10102     if (qual_num > -1) {
10103       AddLegalFeatureField (&list, sfp->idx.subtype, qual_num);
10104     }
10105   }
10106   return list;
10107 }
10108 
10109 
10110 /* Functions for handling new PCR primer sets:
10111  * GetPrimerValueFromBioSource
10112  * GetMultiplePrimerValuesFromBioSource
10113  * RemovePrimerValueFromBioSource
10114  * SetPrimerValueInBioSource
10115 */
10116 
GetPrimerValueFromBioSource(BioSourcePtr biop,Int4 field,StringConstraintPtr constraint)10117 static CharPtr GetPrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint)
10118 {
10119   PCRReactionSetPtr ps;
10120   PCRPrimerPtr      pp;
10121   CharPtr str = NULL;
10122 
10123   if (biop == NULL) {
10124     return NULL;
10125   }
10126 
10127   ps = biop->pcr_primers;
10128   while (ps != NULL && str == NULL) {
10129     switch (field) {
10130       case Source_qual_fwd_primer_name:
10131         pp = ps->forward;
10132         while (pp != NULL && str == NULL) {
10133           if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
10134             str = StringSave (pp->name);
10135           }
10136           pp = pp->next;
10137         }
10138         break;
10139       case Source_qual_fwd_primer_seq:
10140         pp = ps->forward;
10141         while (pp != NULL && str == NULL) {
10142           if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
10143             str = StringSave (pp->seq);
10144           }
10145           pp = pp->next;
10146         }
10147         break;
10148       case Source_qual_rev_primer_name:
10149         pp = ps->reverse;
10150         while (pp != NULL && str == NULL) {
10151           if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
10152             str = StringSave (pp->name);
10153           }
10154           pp = pp->next;
10155         }
10156         break;
10157       case Source_qual_rev_primer_seq:
10158         pp = ps->reverse;
10159         while (pp != NULL && str == NULL) {
10160           if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
10161             str = StringSave (pp->seq);
10162           }
10163           pp = pp->next;
10164         }
10165         break;
10166     }
10167     ps = ps->next;
10168   }
10169   return str;
10170 }
10171 
10172 
GetMultiplePrimerValuesFromBioSource(BioSourcePtr biop,Int4 field,StringConstraintPtr constraint)10173 static ValNodePtr GetMultiplePrimerValuesFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint)
10174 {
10175   PCRReactionSetPtr ps;
10176   PCRPrimerPtr      pp;
10177   ValNodePtr        list = NULL;
10178 
10179   if (biop == NULL) {
10180     return NULL;
10181   }
10182 
10183   ps = biop->pcr_primers;
10184   while (ps != NULL) {
10185     switch (field) {
10186       case Source_qual_fwd_primer_name:
10187         pp = ps->forward;
10188         while (pp != NULL) {
10189           if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
10190             ValNodeAddPointer (&list, 0, StringSave (pp->name));
10191           }
10192           pp = pp->next;
10193         }
10194         break;
10195       case Source_qual_fwd_primer_seq:
10196         pp = ps->forward;
10197         while (pp != NULL) {
10198           if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
10199             ValNodeAddPointer (&list, 0, StringSave (pp->seq));
10200           }
10201           pp = pp->next;
10202         }
10203         break;
10204       case Source_qual_rev_primer_name:
10205         pp = ps->reverse;
10206         while (pp != NULL) {
10207           if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
10208             ValNodeAddPointer (&list, 0, StringSave (pp->name));
10209           }
10210           pp = pp->next;
10211         }
10212         break;
10213       case Source_qual_rev_primer_seq:
10214         pp = ps->reverse;
10215         while (pp != NULL) {
10216           if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
10217             ValNodeAddPointer (&list, 0, StringSave (pp->seq));
10218           }
10219           pp = pp->next;
10220         }
10221         break;
10222     }
10223     ps = ps->next;
10224   }
10225   return list;
10226 }
10227 
10228 
PCRPrimerIsEmpty(PCRPrimerPtr primer)10229 static Boolean PCRPrimerIsEmpty (PCRPrimerPtr primer)
10230 {
10231   if (primer == NULL) {
10232     return TRUE;
10233   } else if (StringHasNoText (primer->name) && StringHasNoText (primer->seq)) {
10234     return TRUE;
10235   } else {
10236     return FALSE;
10237   }
10238 }
10239 
10240 
PCRPrimerListIsEmpty(PCRPrimerPtr primer)10241 static Boolean PCRPrimerListIsEmpty (PCRPrimerPtr primer)
10242 {
10243   Boolean rval = TRUE;
10244 
10245   while (primer != NULL && rval) {
10246     rval = PCRPrimerIsEmpty(primer);
10247     primer = primer->next;
10248   }
10249   return rval;
10250 }
10251 
10252 
PCRReactionIsEmpty(PCRReactionPtr pr)10253 NLM_EXTERN Boolean PCRReactionIsEmpty (PCRReactionPtr pr)
10254 {
10255   if (pr == NULL) {
10256     return TRUE;
10257   } else if (PCRPrimerListIsEmpty(pr->forward) && PCRPrimerListIsEmpty(pr->reverse)) {
10258     return TRUE;
10259   } else {
10260     return FALSE;
10261   }
10262 }
10263 
10264 
RemoveNameFromPrimerList(PCRPrimerPtr PNTR pp_list,StringConstraintPtr constraint)10265 static Boolean RemoveNameFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint)
10266 {
10267   PCRPrimerPtr  pp, prev_pp = NULL, next_pp;
10268   Boolean       rval = FALSE;
10269 
10270   if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) {
10271     return FALSE;
10272   }
10273   while (pp != NULL) {
10274     if (!StringHasNoText (pp->name) && DoesStringMatchConstraint (pp->name, constraint)) {
10275       pp->name = MemFree (pp->name);
10276       rval = TRUE;
10277     }
10278     next_pp = pp->next;
10279     if (PCRPrimerIsEmpty(pp)) {
10280       pp->next = NULL;
10281       pp = PCRPrimerFree (pp);
10282       if (prev_pp == NULL) {
10283         *pp_list = next_pp;
10284       } else {
10285         prev_pp->next = next_pp;
10286       }
10287     } else {
10288       prev_pp = pp;
10289     }
10290     pp = next_pp;
10291   }
10292   return rval;
10293 }
10294 
10295 
RemoveSeqFromPrimerList(PCRPrimerPtr PNTR pp_list,StringConstraintPtr constraint)10296 static Boolean RemoveSeqFromPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint)
10297 {
10298   PCRPrimerPtr  pp, prev_pp = NULL, next_pp;
10299   Boolean       rval = FALSE;
10300 
10301   if (pp_list == NULL || (pp = (PCRPrimerPtr) *pp_list) == NULL) {
10302     return FALSE;
10303   }
10304   while (pp != NULL) {
10305     if (!StringHasNoText (pp->seq) && DoesStringMatchConstraint (pp->seq, constraint)) {
10306       pp->seq = MemFree (pp->seq);
10307       rval = TRUE;
10308     }
10309     next_pp = pp->next;
10310     if (PCRPrimerIsEmpty(pp)) {
10311       pp->next = NULL;
10312       pp = PCRPrimerFree (pp);
10313       if (prev_pp == NULL) {
10314         *pp_list = next_pp;
10315       } else {
10316         prev_pp->next = next_pp;
10317       }
10318     } else {
10319       prev_pp = pp;
10320     }
10321     pp = next_pp;
10322   }
10323   return rval;
10324 }
10325 
10326 
RemovePrimerValueFromBioSource(BioSourcePtr biop,Int4 field,StringConstraintPtr constraint)10327 static Boolean RemovePrimerValueFromBioSource (BioSourcePtr biop, Int4 field, StringConstraintPtr constraint)
10328 {
10329   PCRReactionSetPtr ps, prev_ps = NULL, next_ps;
10330   Boolean           rval = FALSE;
10331 
10332   if (biop == NULL) {
10333     return FALSE;
10334   }
10335 
10336   ps = biop->pcr_primers;
10337   while (ps != NULL) {
10338     switch (field) {
10339       case Source_qual_fwd_primer_name:
10340         rval |= RemoveNameFromPrimerList (&(ps->forward), constraint);
10341         break;
10342       case Source_qual_fwd_primer_seq:
10343         rval |= RemoveSeqFromPrimerList (&(ps->forward), constraint);
10344         break;
10345       case Source_qual_rev_primer_name:
10346         rval |= RemoveNameFromPrimerList (&(ps->reverse), constraint);
10347         break;
10348       case Source_qual_rev_primer_seq:
10349         rval |= RemoveSeqFromPrimerList (&(ps->reverse), constraint);
10350         break;
10351     }
10352     next_ps = ps->next;
10353     if (PCRReactionIsEmpty(ps)) {
10354       ps->next = NULL;
10355       ps = PCRReactionFree (ps);
10356       if (prev_ps == NULL) {
10357         biop->pcr_primers = next_ps;
10358       } else {
10359         prev_ps->next = next_ps;
10360       }
10361     } else {
10362       prev_ps = ps;
10363     }
10364     ps = next_ps;
10365   }
10366   return rval;
10367 }
10368 
10369 
IsCompoundPrimerValue(CharPtr value)10370 static Boolean IsCompoundPrimerValue (CharPtr value)
10371 {
10372   Int4 len;
10373 
10374   if (StringHasNoText (value)) {
10375     return FALSE;
10376   } else if (StringChr (value, ':') != NULL
10377     || StringChr (value, ',') != NULL) {
10378     return TRUE;
10379   }
10380   len = StringLen (value);
10381   if (*value == '(' && value[len - 1] == ')') {
10382     return TRUE;
10383   } else {
10384     return FALSE;
10385   }
10386 }
10387 
10388 
HasMultiplePrimerSets(CharPtr value)10389 static Boolean HasMultiplePrimerSets (CharPtr value)
10390 {
10391   if (StringChr (value, ',')) {
10392     return TRUE;
10393   } else {
10394     return FALSE;
10395   }
10396 }
10397 
10398 
GetPrimerSetComponents(CharPtr value)10399 static ValNodePtr GetPrimerSetComponents (CharPtr value)
10400 {
10401   CharPtr cp, last_cp, tmp, src, dst;
10402   ValNodePtr list = NULL;
10403 
10404   last_cp = value;
10405   for (cp = StringChr (value, ','); cp != NULL; cp = StringChr (last_cp, ',')) {
10406     tmp = (CharPtr) MemNew (sizeof (Char) * (cp - last_cp + 1));
10407     src = last_cp;
10408     dst = tmp;
10409     while (src < cp) {
10410       if (*src != '(' && *src != ')') {
10411         *dst = *src;
10412         dst++;
10413       }
10414       src++;
10415     }
10416     *dst = 0;
10417     ValNodeAddPointer (&list, 0, tmp);
10418     last_cp = cp + 1;
10419   }
10420   if (*last_cp != 0) {
10421     tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (last_cp) + 1));
10422     src = last_cp;
10423     dst = tmp;
10424     while (*src != 0) {
10425       if (*src != '(' && *src != ')') {
10426         *dst = *src;
10427         dst++;
10428       }
10429       src++;
10430     }
10431     *dst = 0;
10432     ValNodeAddPointer (&list, 0, tmp);
10433   }
10434   return list;
10435 }
10436 
10437 
GetPrimerElements(CharPtr value)10438 static ValNodePtr GetPrimerElements (CharPtr value)
10439 {
10440   CharPtr cp, last_cp, tmp;
10441   ValNodePtr list = NULL;
10442   Int4 len;
10443 
10444   last_cp = value;
10445   for (cp = StringChr (value, ':'); cp != NULL; cp = StringChr (last_cp, ':')) {
10446     len = cp - last_cp + 1;
10447     tmp = (CharPtr) MemNew (sizeof (Char) * len);
10448     StringNCpy (tmp, last_cp, len - 1);
10449     tmp[len - 1] = 0;
10450     ValNodeAddPointer (&list, 0, tmp);
10451     last_cp = cp + 1;
10452   }
10453   if (*last_cp != 0) {
10454     ValNodeAddPointer (&list, 0, StringSave (last_cp));
10455   }
10456   return list;
10457 }
10458 
10459 
OverwriteNameStringIntoPrimerList(CharPtr value,PCRPrimerPtr PNTR p_list)10460 static Boolean OverwriteNameStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list)
10461 {
10462   ValNodePtr elements, vnp;
10463   PCRPrimerPtr pp, prev_pp = NULL;
10464   Boolean any_change = FALSE;
10465 
10466   if (p_list == NULL) {
10467     return FALSE;
10468   }
10469 
10470   elements = GetPrimerElements (value);
10471   for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) {
10472     if (pp == NULL) {
10473       pp = PCRPrimerNew ();
10474       if (prev_pp == NULL) {
10475         *p_list = pp;
10476       } else {
10477         prev_pp->next = pp;
10478       }
10479       any_change = TRUE;
10480     }
10481     if (StringCmp (pp->name, vnp->data.ptrvalue) != 0) {
10482       pp->name = MemFree (pp->name);
10483       pp->name = vnp->data.ptrvalue;
10484       vnp->data.ptrvalue = NULL;
10485       any_change = TRUE;
10486     }
10487     prev_pp = pp;
10488     pp = pp->next;
10489   }
10490   while (pp != NULL) {
10491     if (!StringHasNoText (pp->name)) {
10492       any_change = TRUE;
10493     }
10494     pp->name = MemFree (pp->name);
10495     pp = pp->next;
10496   }
10497   elements = ValNodeFreeData (elements);
10498   return any_change;
10499 }
10500 
10501 
OverwriteSeqStringIntoPrimerList(CharPtr value,PCRPrimerPtr PNTR p_list)10502 static Boolean OverwriteSeqStringIntoPrimerList (CharPtr value, PCRPrimerPtr PNTR p_list)
10503 {
10504   ValNodePtr elements, vnp;
10505   PCRPrimerPtr pp, prev_pp = NULL;
10506   Boolean any_change = FALSE;
10507 
10508   if (p_list == NULL) {
10509     return FALSE;
10510   }
10511 
10512   elements = GetPrimerElements (value);
10513   for (vnp = elements, pp = *p_list; vnp != NULL; vnp = vnp->next) {
10514     if (pp == NULL) {
10515       pp = PCRPrimerNew ();
10516       if (prev_pp == NULL) {
10517         *p_list = pp;
10518       } else {
10519         prev_pp->next = pp;
10520       }
10521       any_change = TRUE;
10522     }
10523     if (StringCmp (pp->seq, vnp->data.ptrvalue) != 0) {
10524       pp->seq = MemFree (pp->seq);
10525       pp->seq = vnp->data.ptrvalue;
10526       vnp->data.ptrvalue = NULL;
10527       any_change = TRUE;
10528     }
10529     prev_pp = pp;
10530     pp = pp->next;
10531   }
10532   while (pp != NULL) {
10533     if (!StringHasNoText (pp->seq)) {
10534       any_change = TRUE;
10535     }
10536     pp->seq = MemFree (pp->seq);
10537     pp = pp->next;
10538   }
10539   elements = ValNodeFreeData (elements);
10540   return any_change;
10541 }
10542 
10543 
OverwriteFwdNameStringIntoPCRReactionSet(CharPtr value,PCRReactionPtr PNTR p_list)10544 static Boolean OverwriteFwdNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
10545 {
10546   ValNodePtr sets, vnp;
10547   PCRReactionPtr ps, prev_ps = NULL;
10548   Boolean any_change = FALSE;
10549 
10550   if (p_list == NULL) {
10551     return FALSE;
10552   }
10553 
10554   sets = GetPrimerSetComponents (value);
10555   for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
10556     if (ps == NULL) {
10557       ps = PCRReactionNew ();
10558       if (prev_ps == NULL) {
10559         *p_list = ps;
10560       } else {
10561         prev_ps->next = ps;
10562       }
10563       any_change = TRUE;
10564     }
10565     any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward));
10566     prev_ps = ps;
10567     ps = ps->next;
10568   }
10569   while (ps != NULL) {
10570     any_change |= RemoveNameFromPrimerList (&(ps->forward), NULL);
10571     ps = ps->next;
10572   }
10573   sets = ValNodeFreeData (sets);
10574   return any_change;
10575 }
10576 
10577 
OverwriteRevNameStringIntoPCRReactionSet(CharPtr value,PCRReactionPtr PNTR p_list)10578 static Boolean OverwriteRevNameStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
10579 {
10580   ValNodePtr sets, vnp;
10581   PCRReactionPtr ps, prev_ps = NULL;
10582   Boolean any_change = FALSE;
10583 
10584   if (p_list == NULL) {
10585     return FALSE;
10586   }
10587 
10588   sets = GetPrimerSetComponents (value);
10589   for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
10590     if (ps == NULL) {
10591       ps = PCRReactionNew ();
10592       if (prev_ps == NULL) {
10593         *p_list = ps;
10594       } else {
10595         prev_ps->next = ps;
10596       }
10597       any_change = TRUE;
10598     }
10599     any_change |= OverwriteNameStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse));
10600     prev_ps = ps;
10601     ps = ps->next;
10602   }
10603   while (ps != NULL) {
10604     any_change |= RemoveNameFromPrimerList (&(ps->reverse), NULL);
10605     ps = ps->next;
10606   }
10607   sets = ValNodeFreeData (sets);
10608   return any_change;
10609 }
10610 
10611 
OverwriteFwdSeqStringIntoPCRReactionSet(CharPtr value,PCRReactionPtr PNTR p_list)10612 static Boolean OverwriteFwdSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
10613 {
10614   ValNodePtr sets, vnp;
10615   PCRReactionPtr ps, prev_ps = NULL;
10616   Boolean any_change = FALSE;
10617 
10618   if (p_list == NULL) {
10619     return FALSE;
10620   }
10621 
10622   sets = GetPrimerSetComponents (value);
10623   for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
10624     if (ps == NULL) {
10625       ps = PCRReactionNew ();
10626       if (prev_ps == NULL) {
10627         *p_list = ps;
10628       } else {
10629         prev_ps->next = ps;
10630       }
10631       any_change = TRUE;
10632     }
10633     any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->forward));
10634     prev_ps = ps;
10635     ps = ps->next;
10636   }
10637   while (ps != NULL) {
10638     any_change |= RemoveSeqFromPrimerList (&(ps->forward), NULL);
10639     ps = ps->next;
10640   }
10641   sets = ValNodeFreeData (sets);
10642   return any_change;
10643 }
10644 
10645 
OverwriteRevSeqStringIntoPCRReactionSet(CharPtr value,PCRReactionPtr PNTR p_list)10646 static Boolean OverwriteRevSeqStringIntoPCRReactionSet (CharPtr value, PCRReactionPtr PNTR p_list)
10647 {
10648   ValNodePtr sets, vnp;
10649   PCRReactionPtr ps, prev_ps = NULL;
10650   Boolean any_change = FALSE;
10651 
10652   if (p_list == NULL) {
10653     return FALSE;
10654   }
10655 
10656   sets = GetPrimerSetComponents (value);
10657   for (vnp = sets, ps = *p_list; vnp != NULL; vnp = vnp->next) {
10658     if (ps == NULL) {
10659       ps = PCRReactionNew ();
10660       if (prev_ps == NULL) {
10661         *p_list = ps;
10662       } else {
10663         prev_ps->next = ps;
10664       }
10665       any_change = TRUE;
10666     }
10667     any_change |= OverwriteSeqStringIntoPrimerList (vnp->data.ptrvalue, &(ps->reverse));
10668     prev_ps = ps;
10669     ps = ps->next;
10670   }
10671   while (ps != NULL) {
10672     any_change |= RemoveSeqFromPrimerList (&(ps->reverse), NULL);
10673     ps = ps->next;
10674   }
10675   sets = ValNodeFreeData (sets);
10676   return any_change;
10677 }
10678 
10679 
SetNameInPrimerList(PCRPrimerPtr PNTR pp_list,StringConstraintPtr constraint,CharPtr value,Uint2 existing_text)10680 static Boolean SetNameInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
10681 {
10682   PCRPrimerPtr  pp, prev_pp = NULL;
10683   Boolean       rval = FALSE;
10684 
10685   if (pp_list == NULL) {
10686     return FALSE;
10687   }
10688   pp = *pp_list;
10689 
10690   while (pp != NULL) {
10691     if (DoesStringMatchConstraint (pp->name, constraint)) {
10692       rval = SetStringValue (&(pp->name), value, existing_text);
10693     }
10694     prev_pp = pp;
10695     pp = pp->next;
10696   }
10697   return rval;
10698 }
10699 
10700 
SetSeqInPrimerList(PCRPrimerPtr PNTR pp_list,StringConstraintPtr constraint,CharPtr value,Uint2 existing_text)10701 static Boolean SetSeqInPrimerList (PCRPrimerPtr PNTR pp_list, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
10702 {
10703   PCRPrimerPtr  pp, prev_pp = NULL;
10704   Boolean       rval = FALSE;
10705 
10706   if (pp_list == NULL) {
10707     return FALSE;
10708   }
10709   pp = *pp_list;
10710 
10711   while (pp != NULL) {
10712     if (DoesStringMatchConstraint (pp->seq, constraint)) {
10713       rval = SetStringValue (&(pp->seq), value, existing_text);
10714     }
10715     prev_pp = pp;
10716     pp = pp->next;
10717   }
10718   return rval;
10719 }
10720 
10721 
SetPrimerValueInBioSource(BioSourcePtr biop,Int4 field,StringConstraintPtr constraint,CharPtr value,Uint2 existing_text)10722 static Boolean SetPrimerValueInBioSource(BioSourcePtr biop, Int4 field, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
10723 {
10724   PCRReactionSetPtr ps, prev_ps = NULL;
10725   Boolean rval = FALSE;
10726 
10727   ps = biop->pcr_primers;
10728 
10729   if (IsCompoundPrimerValue(value)) {
10730     if (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL) {
10731       switch (field) {
10732         case Source_qual_fwd_primer_name:
10733           rval = OverwriteFwdNameStringIntoPCRReactionSet (value, &(biop->pcr_primers));
10734           break;
10735         case Source_qual_fwd_primer_seq:
10736           rval = OverwriteFwdSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers));
10737           break;
10738         case Source_qual_rev_primer_name:
10739           rval = OverwriteRevNameStringIntoPCRReactionSet (value, &(biop->pcr_primers));
10740           break;
10741         case Source_qual_rev_primer_seq:
10742           rval = OverwriteRevSeqStringIntoPCRReactionSet (value, &(biop->pcr_primers));
10743           break;
10744       }
10745     }
10746   } else {
10747     while (ps != NULL) {
10748       switch (field) {
10749         case Source_qual_fwd_primer_name:
10750           rval |= SetNameInPrimerList (&(ps->forward), constraint, value, existing_text);
10751           break;
10752         case Source_qual_fwd_primer_seq:
10753           rval |= SetSeqInPrimerList (&(ps->forward), constraint, value, existing_text);
10754           break;
10755         case Source_qual_rev_primer_name:
10756           rval |= SetNameInPrimerList (&(ps->reverse), constraint, value, existing_text);
10757           break;
10758         case Source_qual_rev_primer_seq:
10759           rval |= SetSeqInPrimerList (&(ps->reverse), constraint, value, existing_text);
10760           break;
10761       }
10762       prev_ps = ps;
10763       ps = ps->next;
10764     }
10765 
10766     if (IsStringConstraintEmpty (constraint) && !rval && (existing_text != ExistingTextOption_leave_old || biop->pcr_primers == NULL)) {
10767       if (prev_ps == NULL) {
10768         ps = PCRReactionSetNew ();
10769         biop->pcr_primers = ps;
10770       } else if ((PCRPrimerListIsEmpty(prev_ps->forward)
10771                && (field == Source_qual_fwd_primer_name || field == Source_qual_fwd_primer_seq))
10772               || (PCRPrimerListIsEmpty(prev_ps->reverse)
10773               && (field == Source_qual_rev_primer_name || field == Source_qual_rev_primer_seq))) {
10774         /* add to previous set */
10775         ps = prev_ps;
10776       } else {
10777         /* field is filled on previous, build a new one */
10778         ps = PCRReactionSetNew ();
10779         prev_ps->next = ps;
10780       }
10781       switch (field) {
10782         case Source_qual_fwd_primer_name:
10783           ps->forward = PCRPrimerNew ();
10784           ps->forward->name = StringSave (value);
10785           rval = TRUE;
10786           break;
10787         case Source_qual_fwd_primer_seq:
10788           ps->forward = PCRPrimerNew ();
10789           ps->forward->seq = StringSave (value);
10790           rval = TRUE;
10791           break;
10792         case Source_qual_rev_primer_name:
10793           ps->reverse = PCRPrimerNew ();
10794           ps->reverse->name = StringSave (value);
10795           rval = TRUE;
10796           break;
10797         case Source_qual_rev_primer_seq:
10798           ps->reverse = PCRPrimerNew ();
10799           ps->reverse->seq = StringSave (value);
10800           rval = TRUE;
10801           break;
10802       }
10803     }
10804   }
10805   return rval;
10806 }
10807 
10808 
10809 
10810 /* functions for source qualifiers */
10811 
HasTaxonomyID(BioSourcePtr biop)10812 NLM_EXTERN Boolean HasTaxonomyID (BioSourcePtr biop)
10813 {
10814   ValNodePtr  db;
10815   DbtagPtr    dbt;
10816   Boolean     rval = FALSE;
10817 
10818   if (biop == NULL || biop->org == NULL) {
10819     return FALSE;
10820   }
10821   for (db = biop->org->db; db != NULL && !rval; db = db->next) {
10822     dbt = (DbtagPtr) db->data.ptrvalue;
10823     if (dbt != NULL && dbt->db != NULL &&
10824       StringICmp (dbt->db, "taxon") == 0) {
10825       rval = TRUE;
10826     }
10827   }
10828   return rval;
10829 }
10830 
10831 
GetTaxonomyId(BioSourcePtr biop,StringConstraintPtr scp)10832 static CharPtr GetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp)
10833 {
10834   ValNodePtr  db;
10835   DbtagPtr    dbt;
10836   CharPtr     str = NULL;
10837   Char        buf[15];
10838 
10839   if (biop == NULL || biop->org == NULL) {
10840     return NULL;
10841   }
10842   for (db = biop->org->db; db != NULL && str == NULL; db = db->next) {
10843     dbt = (DbtagPtr) db->data.ptrvalue;
10844     if (dbt != NULL && dbt->db != NULL &&
10845       StringICmp (dbt->db, "taxon") == 0) {
10846       if (dbt->tag->id > 0) {
10847         sprintf (buf, "%d", dbt->tag->id);
10848         if (DoesStringMatchConstraint (buf, scp)) {
10849           str = StringSave (buf);
10850         }
10851       } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) {
10852         str = StringSave (dbt->tag->str);
10853       }
10854     }
10855   }
10856   return str;
10857 }
10858 
10859 
GetMultipleTaxidStrings(ValNodePtr list,StringConstraintPtr scp)10860 static ValNodePtr GetMultipleTaxidStrings (ValNodePtr list, StringConstraintPtr scp)
10861 {
10862   ValNodePtr vnp, val_list = NULL;
10863   DbtagPtr   dbt;
10864   CharPtr    str = NULL;
10865   Char       buf[15];
10866 
10867   for (vnp = list; vnp != NULL; vnp = vnp->next) {
10868     dbt = (DbtagPtr) vnp->data.ptrvalue;
10869     if (dbt != NULL && StringCmp (dbt->db, "taxon") == 0) {
10870       if (dbt->tag->id > 0) {
10871         sprintf (buf, "%d", dbt->tag->id);
10872         if (DoesStringMatchConstraint (buf, scp)) {
10873           str = StringSave (buf);
10874         }
10875       } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) {
10876         str = StringSave (dbt->tag->str);
10877       }
10878       if (str != NULL) {
10879         ValNodeAddPointer (&val_list, 0, str);
10880       }
10881     }
10882   }
10883 
10884   return val_list;
10885 }
10886 
10887 
SetTaxonomyId(BioSourcePtr biop,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)10888 static Boolean SetTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
10889 {
10890   CharPtr tmp;
10891   CharPtr fmt = "taxon:%s";
10892   Boolean rval;
10893 
10894   if (biop == NULL) {
10895     return FALSE;
10896   }
10897   if (biop->org == NULL) {
10898     biop->org = OrgRefNew();
10899   }
10900   tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (value) + StringLen (fmt)));
10901   sprintf (tmp, fmt, value == NULL ? "" : value);
10902   rval = SetDbxrefString (&(biop->org->db), scp, tmp, existing_text);
10903   tmp = MemFree (tmp);
10904   return rval;
10905 }
10906 
10907 
RemoveTaxonomyId(BioSourcePtr biop,StringConstraintPtr scp)10908 static Boolean RemoveTaxonomyId (BioSourcePtr biop, StringConstraintPtr scp)
10909 {
10910   ValNodePtr  db, db_prev = NULL, db_next;
10911   DbtagPtr    dbt;
10912   Boolean     rval = FALSE, do_remove;
10913   Char        buf[15];
10914 
10915   if (biop == NULL || biop->org == NULL) {
10916     return FALSE;
10917   }
10918   for (db = biop->org->db; db != NULL; db = db_next) {
10919     db_next = db->next;
10920     dbt = (DbtagPtr) db->data.ptrvalue;
10921     do_remove = FALSE;
10922     if (dbt != NULL && dbt->db != NULL &&
10923       StringICmp (dbt->db, "taxon") == 0) {
10924       if (dbt->tag->id > 0) {
10925         sprintf (buf, "%d", dbt->tag->id);
10926         if (DoesStringMatchConstraint (buf, scp)) {
10927           do_remove = TRUE;
10928         }
10929       } else if (!StringHasNoText (dbt->tag->str) && DoesStringMatchConstraint (dbt->tag->str, scp)) {
10930         do_remove = TRUE;
10931       }
10932     }
10933     if (do_remove) {
10934       if (db_prev == NULL) {
10935         biop->org->db = db_next;
10936       } else {
10937         db_prev->next = db_next;
10938       }
10939       db->next = NULL;
10940       db->data.ptrvalue = DbtagFree (db->data.ptrvalue);
10941       db = ValNodeFree (db);
10942       rval = TRUE;
10943     } else {
10944       db_prev = db;
10945     }
10946   }
10947   return rval;
10948 }
10949 
10950 
GetSourceQualFromBioSource(BioSourcePtr biop,SourceQualChoicePtr scp,StringConstraintPtr constraint)10951 NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint)
10952 {
10953   CharPtr str = NULL;
10954   SubSourcePtr ssp;
10955   OrgModPtr mod;
10956   Int4 orgmod_subtype = -1, subsrc_subtype = -1;
10957   Int4 subfield;
10958   ValNode vn;
10959   Char buf[15];
10960 
10961   if (biop == NULL || scp == NULL) return NULL;
10962 
10963   switch (scp->choice)
10964   {
10965     case SourceQualChoice_textqual:
10966       if (scp->data.intvalue == Source_qual_taxname) {
10967         if (biop->org != NULL && !StringHasNoText (biop->org->taxname)
10968             && DoesStringMatchConstraint (biop->org->taxname, constraint)) {
10969           str = StringSave (biop->org->taxname);
10970         }
10971       } else if (scp->data.intvalue == Source_qual_common_name) {
10972         if (biop->org != NULL && !StringHasNoText (biop->org->common)
10973             && DoesStringMatchConstraint (biop->org->common, constraint)) {
10974           str = StringSave (biop->org->common);
10975         }
10976       } else if (scp->data.intvalue == Source_qual_lineage) {
10977         if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage)
10978             && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) {
10979           str = StringSave (biop->org->orgname->lineage);
10980         }
10981       } else if (scp->data.intvalue == Source_qual_division) {
10982         if (biop->org != NULL && biop->org->orgname != NULL  && !StringHasNoText (biop->org->orgname->div)
10983             && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) {
10984           str = StringSave (biop->org->orgname->div);
10985         }
10986       } else if (scp->data.intvalue == Source_qual_dbxref) {
10987         if (biop->org != NULL) {
10988           str = GetDbxrefString (biop->org->db, constraint);
10989         }
10990       } else if (scp->data.intvalue == Source_qual_taxid) {
10991         str = GetTaxonomyId (biop, constraint);
10992       } else if (scp->data.intvalue == Source_qual_all_notes) {
10993         vn.choice = SourceQualChoice_textqual;
10994         vn.data.intvalue = Source_qual_subsource_note;
10995         vn.next = NULL;
10996         str = GetSourceQualFromBioSource (biop, &vn, constraint);
10997         if (str == NULL) {
10998           vn.data.intvalue = Source_qual_orgmod_note;
10999           str = GetSourceQualFromBioSource (biop, &vn, constraint);
11000         }
11001       } else if (scp->data.intvalue == Source_qual_all_quals || scp->data.intvalue == Source_qual_all_primers) {
11002         /* will not do */
11003       } else if (scp->data.intvalue == Source_qual_fwd_primer_name
11004                  || scp->data.intvalue == Source_qual_fwd_primer_seq
11005                  || scp->data.intvalue == Source_qual_rev_primer_name
11006                  || scp->data.intvalue == Source_qual_rev_primer_seq) {
11007         /* fetch from new primer object */
11008         str = GetPrimerValueFromBioSource (biop, scp->data.intvalue, constraint);
11009       } else {
11010         orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield);
11011         if (orgmod_subtype == -1) {
11012           subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield);
11013           for (ssp = biop->subtype; ssp != NULL && str == NULL; ssp = ssp->next) {
11014             if (ssp->subtype == subsrc_subtype) {
11015               if (StringHasNoText (ssp->name)) {
11016                 if (IsNonTextSourceQual (scp->data.intvalue)
11017                     && DoesStringMatchConstraint ("TRUE", constraint)) {
11018                   str = StringSave ("TRUE");
11019                 }
11020               } else {
11021                 if (subfield == 0) {
11022                   if (DoesStringMatchConstraint (ssp->name, constraint)) {
11023                     str = StringSave (ssp->name);
11024                   }
11025                 } else {
11026                   str = GetThreeFieldSubfield (ssp->name, subfield);
11027                   if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) {
11028                     str = MemFree (str);
11029                   }
11030                 }
11031               }
11032             }
11033           }
11034         } else {
11035           if (biop->org != NULL && biop->org->orgname != NULL) {
11036             for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) {
11037               if (mod->subtype == orgmod_subtype) {
11038                 if (StringHasNoText (mod->subname)) {
11039                   if (IsNonTextSourceQual (scp->data.intvalue)
11040                       && DoesStringMatchConstraint ("TRUE", constraint)) {
11041                     str = StringSave ("TRUE");
11042                   }
11043                 } else {
11044                   if (subfield == 0) {
11045                     if (DoesStringMatchConstraint (mod->subname, constraint)) {
11046                       str = StringSave (mod->subname);
11047                     }
11048                   } else {
11049                     str = GetThreeFieldSubfield (mod->subname, subfield);
11050                     if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) {
11051                       str = MemFree (str);
11052                     }
11053                   }
11054                 }
11055               }
11056             }
11057           }
11058         }
11059       }
11060       break;
11061     case SourceQualChoice_location:
11062       str = LocNameFromGenome (biop->genome);
11063       if (DoesStringMatchConstraint (str, constraint)) {
11064         str = StringSave (str);
11065       } else {
11066         str = NULL;
11067       }
11068       break;
11069     case SourceQualChoice_origin:
11070       str = OriginNameFromOrigin (biop->origin);
11071       if (DoesStringMatchConstraint (str, constraint)) {
11072         str = StringSave (str);
11073       } else {
11074         str = NULL;
11075       }
11076       break;
11077     case SourceQualChoice_gcode:
11078       if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) {
11079         sprintf (buf, "%d", biop->org->orgname->gcode);
11080         str = StringSave (buf);
11081       }
11082       break;
11083     case SourceQualChoice_mgcode:
11084       if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) {
11085         sprintf (buf, "%d", biop->org->orgname->mgcode);
11086         str = StringSave (buf);
11087       }
11088       break;
11089   }
11090   return str;
11091 }
11092 
11093 
GetMultipleSourceQualsFromBioSource(BioSourcePtr biop,SourceQualChoicePtr scp,StringConstraintPtr constraint)11094 NLM_EXTERN ValNodePtr GetMultipleSourceQualsFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint)
11095 {
11096   ValNodePtr val_list = NULL;
11097   CharPtr str = NULL;
11098   SubSourcePtr ssp;
11099   OrgModPtr mod;
11100   Int4 orgmod_subtype = -1, subsrc_subtype = -1;
11101   Int4 subfield;
11102   ValNode vn;
11103 
11104   if (biop == NULL || scp == NULL) return NULL;
11105 
11106   if (scp->choice == SourceQualChoice_textqual) {
11107     if (scp->data.intvalue == Source_qual_taxname) {
11108       if (biop->org != NULL && !StringHasNoText (biop->org->taxname)
11109           && DoesStringMatchConstraint (biop->org->taxname, constraint)) {
11110         ValNodeAddPointer (&val_list, 0, StringSave (biop->org->taxname));
11111       }
11112     } else if (scp->data.intvalue == Source_qual_common_name) {
11113       if (biop->org != NULL && !StringHasNoText (biop->org->common)
11114           && DoesStringMatchConstraint (biop->org->common, constraint)) {
11115         ValNodeAddPointer (&val_list, 0, StringSave (biop->org->common));
11116       }
11117     } else if (scp->data.intvalue == Source_qual_lineage) {
11118       if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage)
11119           && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) {
11120         ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->lineage));
11121       }
11122     } else if (scp->data.intvalue == Source_qual_division) {
11123       if (biop->org != NULL && biop->org->orgname != NULL  && !StringHasNoText (biop->org->orgname->div)
11124           && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) {
11125         ValNodeAddPointer (&val_list, 0, StringSave (biop->org->orgname->div));
11126       }
11127     } else if (scp->data.intvalue == Source_qual_dbxref) {
11128       if (biop->org != NULL) {
11129         ValNodeLink (&val_list, GetMultipleDbxrefStrings (biop->org->db, constraint));
11130       }
11131     } else if (scp->data.intvalue == Source_qual_taxid) {
11132       if (biop->org != NULL) {
11133         ValNodeLink (&val_list, GetMultipleTaxidStrings (biop->org->db, constraint));
11134       }
11135     } else if (scp->data.intvalue == Source_qual_fwd_primer_name
11136                  || scp->data.intvalue == Source_qual_fwd_primer_seq
11137                  || scp->data.intvalue == Source_qual_rev_primer_name
11138                  || scp->data.intvalue == Source_qual_rev_primer_seq) {
11139       /* fetch from new primer object */
11140       ValNodeLink (&val_list, GetMultiplePrimerValuesFromBioSource (biop, scp->data.intvalue, constraint));
11141     } else if (scp->data.intvalue == Source_qual_all_notes) {
11142       vn.choice = SourceQualChoice_textqual;
11143       vn.data.intvalue = Source_qual_subsource_note;
11144       vn.next = NULL;
11145       str = GetSourceQualFromBioSource (biop, &vn, constraint);
11146       if (str != NULL) {
11147         ValNodeAddPointer (&val_list, 0, str);
11148       }
11149       vn.data.intvalue = Source_qual_orgmod_note;
11150       str = GetSourceQualFromBioSource (biop, &vn, constraint);
11151       if (str != NULL) {
11152         ValNodeAddPointer (&val_list, 0, str);
11153       }
11154     } else if (scp->data.intvalue == Source_qual_all_quals
11155                || scp->data.intvalue == Source_qual_all_primers) {
11156       /* will not do */
11157     } else {
11158       orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield);
11159       if (orgmod_subtype == -1) {
11160         subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield);
11161         for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
11162           if (ssp->subtype == subsrc_subtype) {
11163             if (StringHasNoText (ssp->name)) {
11164               if (IsNonTextSourceQual (scp->data.intvalue)
11165                   && DoesStringMatchConstraint ("TRUE", constraint)) {
11166                 ValNodeAddPointer (&val_list, 0, StringSave ("TRUE"));
11167               }
11168             } else {
11169               if (subfield == 0) {
11170                 if (DoesStringMatchConstraint (ssp->name, constraint)) {
11171                   ValNodeAddPointer (&val_list, 0, StringSave (ssp->name));
11172                 }
11173               } else {
11174                 str = GetThreeFieldSubfield (ssp->name, subfield);
11175                 if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) {
11176                   str = MemFree (str);
11177                 } else {
11178                   ValNodeAddPointer (&val_list, 0, str);
11179                 }
11180               }
11181             }
11182           }
11183         }
11184       } else {
11185         if (biop->org != NULL && biop->org->orgname != NULL) {
11186           for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) {
11187             if (mod->subtype == orgmod_subtype) {
11188               if (StringHasNoText (mod->subname)) {
11189                 if (IsNonTextSourceQual (scp->data.intvalue)
11190                     && DoesStringMatchConstraint ("TRUE", constraint)) {
11191                   ValNodeAddPointer (&val_list, 0, StringSave ("TRUE"));
11192                 }
11193               } else {
11194                 if (subfield == 0) {
11195                   if (DoesStringMatchConstraint (mod->subname, constraint)) {
11196                     ValNodeAddPointer (&val_list, 0, StringSave (mod->subname));
11197                   }
11198                 } else {
11199                   str = GetThreeFieldSubfield (mod->subname, subfield);
11200                   if (StringHasNoText (str) || !DoesStringMatchConstraint (str, constraint)) {
11201                     str = MemFree (str);
11202                   } else {
11203                     ValNodeAddPointer (&val_list, 0, str);
11204                   }
11205                 }
11206               }
11207             }
11208           }
11209         }
11210       }
11211     }
11212   } else {
11213     str = GetSourceQualFromBioSource (biop, scp, constraint);
11214     if (str != NULL) {
11215       ValNodeAddPointer (&val_list, 0, str);
11216     }
11217   }
11218   return val_list;
11219 }
11220 
11221 
RemoveAllSourceQualsFromBioSource(BioSourcePtr biop,StringConstraintPtr constraint)11222 static Boolean RemoveAllSourceQualsFromBioSource (BioSourcePtr biop, StringConstraintPtr constraint)
11223 {
11224   Int4 i;
11225   Boolean rval = FALSE;
11226   ValNode vn;
11227 
11228   vn.next = NULL;
11229   vn.choice = SourceQualChoice_textqual;
11230 
11231   for (i = 0; i < NUM_srcqual_scqual; i++) {
11232     if (srcqual_scqual[i].srcqual != Source_qual_all_quals
11233         && srcqual_scqual[i].srcqual != Source_qual_all_notes
11234         && srcqual_scqual[i].srcqual != Source_qual_all_primers) {
11235       vn.data.intvalue = srcqual_scqual[i].srcqual;
11236       rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11237     }
11238   }
11239   return rval;
11240 }
11241 
Lcl_RemoveOldName(OrgRefPtr orp)11242 static void Lcl_RemoveOldName (OrgRefPtr orp)
11243 {
11244   OrgModPtr prev = NULL, curr, next_mod;
11245 
11246   if (orp == NULL || orp->orgname == NULL) return;
11247 
11248   curr = orp->orgname->mod;
11249   while (curr != NULL)
11250   {
11251     next_mod = curr->next;
11252     if (curr->subtype == ORGMOD_old_name)
11253     {
11254       if (prev == NULL)
11255       {
11256         orp->orgname->mod = curr->next;
11257       }
11258       else
11259       {
11260         prev->next = curr->next;
11261       }
11262       curr->next = NULL;
11263       OrgModFree (curr);
11264     }
11265     else
11266     {
11267       prev = curr;
11268     }
11269 
11270     curr = next_mod;
11271   }
11272 }
11273 
RemoveSourceQualFromBioSource(BioSourcePtr biop,SourceQualChoicePtr scp,StringConstraintPtr constraint)11274 NLM_EXTERN Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint)
11275 {
11276   SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
11277   OrgModPtr mod, mod_prev = NULL, mod_next;
11278   Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield;
11279   CharPtr str, tmp;
11280   Boolean rval = FALSE, do_remove, does_match;
11281   ValNode vn;
11282 
11283   if (biop == NULL || scp == NULL) return FALSE;
11284 
11285   switch (scp->choice)
11286   {
11287     case SourceQualChoice_textqual:
11288       if (scp->data.intvalue == Source_qual_taxname) {
11289         if (biop->org != NULL && !StringHasNoText (biop->org->taxname)
11290             && DoesStringMatchConstraint (biop->org->taxname, constraint)) {
11291           biop->org->taxname = MemFree (biop->org->taxname);
11292           RemoveTaxRef (biop->org);
11293           Lcl_RemoveOldName (biop->org);
11294           rval = TRUE;
11295         }
11296       } else if (scp->data.intvalue == Source_qual_common_name) {
11297         if (biop->org != NULL && !StringHasNoText (biop->org->common)
11298             && DoesStringMatchConstraint (biop->org->common, constraint)) {
11299           biop->org->common = MemFree (biop->org->common);
11300           rval = TRUE;
11301         }
11302       } else if (scp->data.intvalue == Source_qual_lineage) {
11303         if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage)
11304             && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) {
11305           biop->org->orgname->lineage = MemFree (biop->org->orgname->lineage);
11306           rval = TRUE;
11307         }
11308       } else if (scp->data.intvalue == Source_qual_division) {
11309         if (biop->org != NULL && biop->org->orgname != NULL  && !StringHasNoText (biop->org->orgname->div)
11310             && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) {
11311           biop->org->orgname->div = MemFree (biop->org->orgname->div);
11312           rval = TRUE;
11313         }
11314       } else if (scp->data.intvalue == Source_qual_dbxref) {
11315         if (biop->org != NULL) {
11316           rval = RemoveDbxrefString (&(biop->org->db), constraint);
11317         }
11318       } else if (scp->data.intvalue == Source_qual_taxid) {
11319         rval = RemoveTaxonomyId (biop, constraint);
11320       } else if (scp->data.intvalue == Source_qual_fwd_primer_name
11321                    || scp->data.intvalue == Source_qual_fwd_primer_seq
11322                    || scp->data.intvalue == Source_qual_rev_primer_name
11323                    || scp->data.intvalue == Source_qual_rev_primer_seq) {
11324         /* remove from new primer object */
11325         rval = RemovePrimerValueFromBioSource (biop, scp->data.intvalue, constraint);
11326       } else if (scp->data.intvalue == Source_qual_all_notes) {
11327         vn.choice = SourceQualChoice_textqual;
11328         vn.data.intvalue = Source_qual_subsource_note;
11329         vn.next = NULL;
11330         rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11331         vn.data.intvalue = Source_qual_orgmod_note;
11332         rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11333       } else if (scp->data.intvalue == Source_qual_all_primers) {
11334         vn.choice = SourceQualChoice_textqual;
11335         vn.data.intvalue = Source_qual_fwd_primer_name;
11336         vn.next = NULL;
11337         rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11338         vn.data.intvalue = Source_qual_rev_primer_name;
11339         rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11340         vn.data.intvalue = Source_qual_fwd_primer_seq;
11341         rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11342         vn.data.intvalue = Source_qual_rev_primer_seq;
11343         rval |= RemoveSourceQualFromBioSource (biop, &vn, constraint);
11344       } else if (scp->data.intvalue == Source_qual_all_quals) {
11345         rval |= RemoveAllSourceQualsFromBioSource (biop, constraint);
11346       } else {
11347         orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield);
11348         if (orgmod_subtype == -1) {
11349           subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield);
11350           ssp = biop->subtype;
11351           while (ssp != NULL) {
11352             ssp_next = ssp->next;
11353             do_remove = FALSE;
11354             if (ssp->subtype == subsrc_subtype) {
11355               if (subfield == 0) {
11356                 if (DoesStringMatchConstraint (ssp->name, constraint)) {
11357                   do_remove = TRUE;
11358                 }
11359               } else {
11360                 does_match = TRUE;
11361                 if (!IsStringConstraintEmpty (constraint)) {
11362                   tmp = GetThreeFieldSubfield (ssp->name, subfield);
11363                   does_match = DoesStringMatchConstraint (tmp, constraint);
11364                   tmp = MemFree (tmp);
11365                 }
11366                 if (does_match) {
11367                   rval |= RemoveThreeFieldSubfield (&(ssp->name), subfield);
11368                   if (StringHasNoText (ssp->name)) {
11369                     do_remove = TRUE;
11370                   }
11371                 }
11372               }
11373             }
11374             if (do_remove) {
11375               if (ssp_prev == NULL) {
11376                 biop->subtype = ssp->next;
11377               } else {
11378                 ssp_prev->next = ssp->next;
11379               }
11380               ssp->next = NULL;
11381               ssp = SubSourceFree (ssp);
11382               rval = TRUE;
11383             } else {
11384               ssp_prev = ssp;
11385             }
11386             ssp = ssp_next;
11387           }
11388         } else {
11389           if (biop->org != NULL && biop->org->orgname != NULL) {
11390             mod = biop->org->orgname->mod;
11391             while (mod != NULL) {
11392               mod_next = mod->next;
11393               do_remove = FALSE;
11394               if (mod->subtype == orgmod_subtype) {
11395                 if (subfield == 0) {
11396                   if (DoesStringMatchConstraint (mod->subname, constraint)) {
11397                     do_remove = TRUE;
11398                   }
11399                 } else {
11400                   does_match = TRUE;
11401                   if (!IsStringConstraintEmpty (constraint)) {
11402                     tmp = GetThreeFieldSubfield (mod->subname, subfield);
11403                     does_match = DoesStringMatchConstraint (tmp, constraint);
11404                     tmp = MemFree (tmp);
11405                   }
11406                   if (does_match) {
11407                     rval |= RemoveThreeFieldSubfield (&(mod->subname), subfield);
11408                   }
11409                   if (StringHasNoText (mod->subname)) {
11410                     do_remove = TRUE;
11411                   }
11412                 }
11413               }
11414               if (do_remove) {
11415                 if (mod_prev == NULL) {
11416                   biop->org->orgname->mod = mod->next;
11417                 } else {
11418                   mod_prev->next = mod->next;
11419                 }
11420                 mod->next = NULL;
11421                 mod = OrgModFree (mod);
11422                 rval = TRUE;
11423               } else {
11424                 mod_prev = mod;
11425               }
11426               mod = mod_next;
11427             }
11428           }
11429         }
11430       }
11431       break;
11432     case SourceQualChoice_location:
11433       str = LocNameFromGenome (biop->genome);
11434       if (DoesStringMatchConstraint (str, constraint)) {
11435         if (scp->data.intvalue == 0 || biop->genome == GenomeFromSrcLoc (scp->data.intvalue)) {
11436           biop->genome = 0;
11437           rval = TRUE;
11438         }
11439       }
11440       break;
11441     case SourceQualChoice_origin:
11442       str = OriginNameFromOrigin (biop->origin);
11443       if (DoesStringMatchConstraint (str, constraint)) {
11444         if (scp->data.intvalue == 0 || biop->origin == OriginFromSrcOrig (scp->data.intvalue)) {
11445           biop->origin = 0;
11446           rval = TRUE;
11447         }
11448       }
11449       break;
11450     case SourceQualChoice_gcode:
11451       if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) {
11452         biop->org->orgname->gcode = 0;
11453         rval = TRUE;
11454       }
11455       break;
11456     case SourceQualChoice_mgcode:
11457       if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) {
11458         biop->org->orgname->mgcode = 0;
11459         rval = TRUE;
11460       }
11461       break;
11462   }
11463   return rval;
11464 }
11465 
11466 
SetSourceQualInBioSource(BioSourcePtr biop,SourceQualChoicePtr scp,StringConstraintPtr constraint,CharPtr value,Uint2 existing_text)11467 NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
11468 {
11469   SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
11470   OrgModPtr mod, mod_prev = NULL, mod_next;
11471   Int4 orgmod_subtype = -1, subsrc_subtype = -1, subfield;
11472   CharPtr str, tmp;
11473   Boolean rval = FALSE, found = FALSE, does_match;
11474   ValNode vn;
11475 
11476   if (biop == NULL || scp == NULL) return FALSE;
11477 
11478   switch (scp->choice)
11479   {
11480     case SourceQualChoice_textqual:
11481       if (scp->data.intvalue == Source_qual_taxname) {
11482         if ((biop->org == NULL && IsStringConstraintEmpty (constraint))
11483             || (biop->org != NULL
11484                 && DoesStringMatchConstraint (biop->org->taxname, constraint))) {
11485           if (biop->org == NULL) {
11486             biop->org = OrgRefNew();
11487           }
11488           rval = SetStringValue (&(biop->org->taxname), value, existing_text);
11489           if (rval) {
11490             RemoveTaxRef (biop->org);
11491             Lcl_RemoveOldName (biop->org);
11492           }
11493         }
11494       } else if (scp->data.intvalue == Source_qual_common_name) {
11495         if ((biop->org == NULL && IsStringConstraintEmpty (constraint))
11496             || (biop->org != NULL
11497                 && DoesStringMatchConstraint (biop->org->common, constraint))) {
11498           if (biop->org == NULL) {
11499             biop->org = OrgRefNew();
11500           }
11501           rval = SetStringValue (&(biop->org->common), value, existing_text);
11502         }
11503       } else if (scp->data.intvalue == Source_qual_lineage) {
11504         if ((biop->org == NULL && IsStringConstraintEmpty (constraint))
11505             ||(biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint))
11506             ||(biop->org != NULL && biop->org->orgname != NULL
11507                && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint))) {
11508           if (biop->org == NULL) {
11509             biop->org = OrgRefNew();
11510           }
11511           if (biop->org->orgname == NULL) {
11512             biop->org->orgname = OrgNameNew ();
11513           }
11514           rval = SetStringValue (&(biop->org->orgname->lineage), value, existing_text);
11515         }
11516       } else if (scp->data.intvalue == Source_qual_division) {
11517         if ((biop->org == NULL && IsStringConstraintEmpty (constraint))
11518             || (biop->org != NULL && biop->org->orgname == NULL && IsStringConstraintEmpty (constraint))
11519             || (biop->org != NULL && biop->org->orgname != NULL
11520                 && DoesStringMatchConstraint (biop->org->orgname->div, constraint))) {
11521           if (biop->org == NULL) {
11522             biop->org = OrgRefNew();
11523           }
11524           if (biop->org->orgname == NULL) {
11525             biop->org->orgname = OrgNameNew ();
11526           }
11527           rval = SetStringValue (&(biop->org->orgname->div), value, existing_text);
11528         }
11529       } else if (scp->data.intvalue == Source_qual_dbxref) {
11530         if (biop->org == NULL) {
11531           biop->org = OrgRefNew ();
11532         }
11533         rval = SetDbxrefString (&(biop->org->db), constraint, value, existing_text);
11534       } else if (scp->data.intvalue == Source_qual_taxid) {
11535         rval = SetTaxonomyId(biop, constraint, value, existing_text);
11536       } else if (scp->data.intvalue == Source_qual_all_notes) {
11537         vn.choice = SourceQualChoice_textqual;
11538         vn.data.intvalue = Source_qual_subsource_note;
11539         vn.next = NULL;
11540         rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text);
11541         vn.data.intvalue = Source_qual_orgmod_note;
11542         rval |= SetSourceQualInBioSource (biop, &vn, constraint, value, existing_text);
11543       } else if (scp->data.intvalue == Source_qual_all_primers) {
11544         rval = SetPrimerValueInBioSource (biop, Source_qual_fwd_primer_name, constraint, value, existing_text);
11545         rval |= SetPrimerValueInBioSource (biop, Source_qual_fwd_primer_seq, constraint, value, existing_text);
11546         rval |= SetPrimerValueInBioSource (biop, Source_qual_rev_primer_name, constraint, value, existing_text);
11547         rval |= SetPrimerValueInBioSource (biop, Source_qual_rev_primer_seq, constraint, value, existing_text);
11548       } else if (scp->data.intvalue == Source_qual_fwd_primer_name
11549                    || scp->data.intvalue == Source_qual_fwd_primer_seq
11550                    || scp->data.intvalue == Source_qual_rev_primer_name
11551                    || scp->data.intvalue == Source_qual_rev_primer_seq) {
11552         /* remove from new primer object */
11553         rval = SetPrimerValueInBioSource (biop, scp->data.intvalue, constraint, value, existing_text);
11554       } else if (scp->data.intvalue == Source_qual_all_quals) {
11555         /* will not do this */
11556       } else {
11557         orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue, &subfield);
11558         if (orgmod_subtype == -1) {
11559           subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue, &subfield);
11560           if (subsrc_subtype > -1) {
11561             if (existing_text == ExistingTextOption_add_qual) {
11562               /* create new subsource */
11563               ssp = SubSourceNew ();
11564               ssp->subtype = subsrc_subtype;
11565               rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text);
11566               /* find last in current list */
11567               ssp_prev = biop->subtype;
11568               while (ssp_prev != NULL && ssp_prev->next != NULL) {
11569                 ssp_prev = ssp_prev->next;
11570               }
11571 
11572               /* add to end of list */
11573               if (ssp_prev == NULL) {
11574                 biop->subtype = ssp;
11575               } else {
11576                 ssp_prev->next = ssp;
11577               }
11578             } else {
11579               ssp = biop->subtype;
11580               while (ssp != NULL) {
11581                 ssp_next = ssp->next;
11582                 if (ssp->subtype == subsrc_subtype) {
11583                   if (subfield == 0) {
11584                     if (DoesStringMatchConstraint (ssp->name, constraint)) {
11585                       rval = SetStringValue (&(ssp->name), value, existing_text);
11586                       found = TRUE;
11587                     }
11588                   } else {
11589                     does_match = TRUE;
11590                     if (!IsStringConstraintEmpty (constraint)) {
11591                       tmp = GetThreeFieldSubfield (ssp->name, subfield);
11592                       does_match = DoesStringMatchConstraint (tmp, constraint);
11593                     }
11594                     if (does_match) {
11595                       rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text);
11596                       found = TRUE;
11597                     }
11598                   }
11599                   if (rval && StringHasNoText (ssp->name) && !IsNonTextSourceQual(scp->data.intvalue)) {
11600                     if (ssp_prev == NULL) {
11601                       biop->subtype = ssp->next;
11602                     } else {
11603                       ssp_prev->next = ssp->next;
11604                     }
11605                     ssp->next = NULL;
11606                     ssp = SubSourceFree (ssp);
11607                   } else {
11608                     ssp_prev = ssp;
11609                   }
11610                 } else {
11611                   ssp_prev = ssp;
11612                 }
11613                 ssp = ssp_next;
11614               }
11615               if (!found && IsStringConstraintEmpty (constraint)) {
11616                 ssp = SubSourceNew ();
11617                 ssp->subtype = subsrc_subtype;
11618                 if (StringHasNoText (value) && IsNonTextSourceQual(scp->data.intvalue)) {
11619                   ssp->name = StringSave ("");
11620                 } else {
11621                   rval = SetThreeFieldSubfield (&(ssp->name), subfield, value, existing_text);
11622                 }
11623                 if (ssp_prev == NULL) {
11624                   biop->subtype = ssp;
11625                 } else {
11626                   ssp_prev->next = ssp;
11627                 }
11628               }
11629             }
11630           }
11631         } else {
11632           if (existing_text == ExistingTextOption_add_qual) {
11633             if (biop->org == NULL) {
11634               biop->org = OrgRefNew();
11635             }
11636             if (biop->org->orgname == NULL) {
11637               biop->org->orgname = OrgNameNew();
11638             }
11639             /* create new orgmod */
11640             mod = OrgModNew ();
11641             mod->subtype = orgmod_subtype;
11642             rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text);
11643             /* find last in current list */
11644             mod_prev = biop->org->orgname->mod;
11645             while (mod_prev != NULL && mod_prev->next != NULL) {
11646               mod_prev = mod_prev->next;
11647             }
11648             /* add to end of list */
11649             if (mod_prev == NULL) {
11650               biop->org->orgname->mod = mod;
11651             } else {
11652               mod_prev->next = mod;
11653             }
11654           } else {
11655             if (biop->org != NULL && biop->org->orgname != NULL) {
11656               mod = biop->org->orgname->mod;
11657               while (mod != NULL) {
11658                 mod_next = mod->next;
11659                 if (mod->subtype == orgmod_subtype) {
11660                   if (subfield == 0) {
11661                     if (DoesStringMatchConstraint (mod->subname, constraint)) {
11662                       rval = SetStringValue (&(mod->subname), value, existing_text);
11663                       found = TRUE;
11664                     }
11665                   } else {
11666                     does_match = TRUE;
11667                     if (!IsStringConstraintEmpty (constraint)) {
11668                       tmp = GetThreeFieldSubfield (mod->subname, subfield);
11669                       does_match = DoesStringMatchConstraint (tmp, constraint);
11670                       tmp = MemFree (tmp);
11671                     }
11672                     if (does_match) {
11673                       rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text);
11674                       found = TRUE;
11675                     }
11676                   }
11677                   if (rval && StringHasNoText (mod->subname) && !IsNonTextSourceQual(scp->data.intvalue)) {
11678                     if (mod_prev == NULL) {
11679                       biop->org->orgname->mod = mod->next;
11680                     } else {
11681                       mod_prev->next = mod->next;
11682                     }
11683                     mod->next = NULL;
11684                     mod = OrgModFree (mod);
11685                   } else {
11686                     mod_prev = mod;
11687                   }
11688                 } else {
11689                   mod_prev = mod;
11690                 }
11691                 mod = mod_next;
11692               }
11693             }
11694             if (!found && IsStringConstraintEmpty (constraint)) {
11695               if (biop->org == NULL) {
11696                 biop->org = OrgRefNew();
11697               }
11698               if (biop->org->orgname == NULL) {
11699                 biop->org->orgname = OrgNameNew();
11700               }
11701               mod = OrgModNew ();
11702               mod->subtype = orgmod_subtype;
11703               rval = SetThreeFieldSubfield (&(mod->subname), subfield, value, existing_text);
11704               if (mod_prev == NULL) {
11705                 biop->org->orgname->mod = mod;
11706               } else {
11707                 mod_prev->next = mod;
11708               }
11709             }
11710           }
11711         }
11712       }
11713       break;
11714     case SourceQualChoice_location:
11715       str = LocNameFromGenome (biop->genome);
11716       if (DoesStringMatchConstraint (str, constraint)) {
11717         biop->genome = GenomeFromSrcLoc (scp->data.intvalue);
11718         rval = TRUE;
11719       }
11720       break;
11721     case SourceQualChoice_origin:
11722       str = OriginNameFromOrigin (biop->origin);
11723       if (DoesStringMatchConstraint (str, constraint)) {
11724         biop->origin = OriginFromSrcOrig(scp->data.intvalue);
11725         rval = TRUE;
11726       }
11727       break;
11728     case SourceQualChoice_gcode:
11729       if (biop->org == NULL) {
11730         biop->org = OrgRefNew();
11731       }
11732       if (biop->org->orgname == NULL) {
11733         biop->org->orgname = OrgNameNew();
11734       }
11735       biop->org->orgname->gcode = scp->data.intvalue;
11736       rval = TRUE;
11737       break;
11738     case SourceQualChoice_mgcode:
11739       if (biop->org == NULL) {
11740         biop->org = OrgRefNew();
11741       }
11742       if (biop->org->orgname == NULL) {
11743         biop->org->orgname = OrgNameNew();
11744       }
11745       biop->org->orgname->mgcode = scp->data.intvalue;
11746       rval = TRUE;
11747       break;
11748   }
11749   return rval;
11750 }
11751 
11752 
GetRepresentativeBioseqFromBioseqSet(BioseqSetPtr bssp)11753 NLM_EXTERN BioseqPtr GetRepresentativeBioseqFromBioseqSet (BioseqSetPtr bssp)
11754 {
11755   SeqEntryPtr sep;
11756   BioseqPtr   bsp = NULL;
11757 
11758   if (bssp == NULL || (bssp->_class != BioseqseqSet_class_segset && bssp->_class != BioseqseqSet_class_nuc_prot)) {
11759     return NULL;
11760   }
11761   sep = bssp->seq_set;
11762   if (sep->data.ptrvalue == NULL) {
11763     bsp = NULL;
11764   } else if (IS_Bioseq(sep)) {
11765     bsp = sep->data.ptrvalue;
11766   } else if (IS_Bioseq_set (sep)) {
11767     bsp = GetRepresentativeBioseqFromBioseqSet (sep->data.ptrvalue);
11768   }
11769   return bsp;
11770 }
11771 
11772 
GetSequenceForObject(Uint1 choice,Pointer data)11773 NLM_EXTERN BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data)
11774 {
11775   BioseqPtr bsp = NULL;
11776   SeqFeatPtr sfp;
11777   SeqDescrPtr sdp;
11778   ObjValNodePtr ovp;
11779   CGPSetPtr cgp;
11780   ValNodePtr vnp;
11781 
11782   if (data == NULL) return NULL;
11783 
11784   switch (choice) {
11785     case OBJ_BIOSEQ:
11786       bsp = (BioseqPtr) data;
11787       break;
11788     case OBJ_SEQFEAT:
11789       sfp = (SeqFeatPtr) data;
11790       bsp = BioseqFindFromSeqLoc (sfp->location);
11791       break;
11792     case OBJ_SEQDESC:
11793       sdp = (SeqDescrPtr) data;
11794       if (sdp->extended) {
11795         ovp = (ObjValNodePtr) sdp;
11796         if (ovp->idx.parenttype == OBJ_BIOSEQ && ovp->idx.parentptr != NULL) {
11797           bsp = ovp->idx.parentptr;
11798         } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
11799           bsp = GetRepresentativeBioseqFromBioseqSet (ovp->idx.parentptr);
11800         }
11801       }
11802       break;
11803     case 0:
11804       cgp = (CGPSetPtr) data;
11805       for (vnp = cgp->cds_list; vnp != NULL && bsp == NULL; vnp = vnp->next) {
11806         sfp = vnp->data.ptrvalue;
11807         if (sfp != NULL) {
11808           bsp = BioseqFindFromSeqLoc (sfp->location);
11809         }
11810       }
11811       for (vnp = cgp->mrna_list; vnp != NULL && bsp == NULL; vnp = vnp->next) {
11812         sfp = vnp->data.ptrvalue;
11813         if (sfp != NULL) {
11814           bsp = BioseqFindFromSeqLoc (sfp->location);
11815         }
11816       }
11817       for (vnp = cgp->gene_list; vnp != NULL && bsp == NULL; vnp = vnp->next) {
11818         sfp = vnp->data.ptrvalue;
11819         if (sfp != NULL) {
11820           bsp = BioseqFindFromSeqLoc (sfp->location);
11821         }
11822       }
11823       break;
11824   }
11825   return bsp;
11826 }
11827 
11828 
GetBioSourceFromObject(Uint1 choice,Pointer data)11829 NLM_EXTERN BioSourcePtr GetBioSourceFromObject (Uint1 choice, Pointer data)
11830 {
11831   BioSourcePtr biop = NULL;
11832   SeqDescrPtr  sdp;
11833   SeqFeatPtr   sfp;
11834   BioseqPtr    bsp = NULL;
11835   SeqMgrDescContext context;
11836 
11837   if (data == NULL) return NULL;
11838 
11839   switch (choice)
11840   {
11841     case OBJ_SEQDESC:
11842       sdp = (SeqDescrPtr) data;
11843       if (sdp->choice == Seq_descr_source) {
11844         biop = sdp->data.ptrvalue;
11845       }
11846       break;
11847     case OBJ_SEQFEAT:
11848       sfp = (SeqFeatPtr) data;
11849       if (sfp->data.choice == SEQFEAT_BIOSRC) {
11850         biop = sfp->data.value.ptrvalue;
11851       }
11852       break;
11853   }
11854   if (biop == NULL) {
11855     bsp = GetSequenceForObject (choice, data);
11856     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
11857     if (sdp != NULL && sdp->choice == Seq_descr_source) {
11858       biop = sdp->data.ptrvalue;
11859     }
11860   }
11861   return biop;
11862 }
11863 
11864 
GetEntityIdFromObject(Uint1 choice,Pointer data)11865 NLM_EXTERN Uint2 GetEntityIdFromObject (Uint1 choice, Pointer data)
11866 {
11867   Uint2 entityID = 0;
11868   SeqDescrPtr  sdp;
11869   ObjValNodePtr ovp;
11870   SeqFeatPtr   sfp;
11871   BioseqPtr    bsp;
11872 
11873   if (data == NULL) return 0;
11874 
11875   switch (choice)
11876   {
11877     case OBJ_SEQDESC:
11878       sdp = (SeqDescrPtr) data;
11879       if (sdp->extended) {
11880         ovp = (ObjValNodePtr) sdp;
11881         entityID = ovp->idx.entityID;
11882       }
11883       break;
11884     case OBJ_SEQFEAT:
11885       sfp = (SeqFeatPtr) data;
11886       entityID = sfp->idx.entityID;
11887       break;
11888     default:
11889       bsp = GetSequenceForObject (choice, data);
11890       if (bsp != NULL) {
11891         entityID = bsp->idx.entityID;
11892       }
11893       break;
11894 
11895   }
11896 
11897   return entityID;
11898 }
11899 
11900 
11901 /* functions for dealing with CDS-Gene-Prot sets */
GetFieldValueFromCGPSet(CGPSetPtr c,Uint2 field,StringConstraintPtr scp)11902 static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp)
11903 {
11904   CharPtr str = NULL;
11905   ValNodePtr vnp;
11906   SeqFeatPtr sfp;
11907   GeneRefPtr grp;
11908   RnaRefPtr  rrp;
11909   ProtRefPtr prp;
11910   FeatureFieldPtr ffield;
11911 
11912   if (c == NULL) return NULL;
11913   switch (field) {
11914     case CDSGeneProt_field_cds_comment:
11915     case CDSGeneProt_field_cds_inference:
11916     case CDSGeneProt_field_codon_start:
11917       ffield = FeatureFieldFromCDSGeneProtField (field);
11918       for (vnp = c->cds_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11919         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11920         str = GetQualFromFeature (sfp, ffield, scp);
11921       }
11922       ffield = FeatureFieldFree (ffield);
11923       break;
11924     case CDSGeneProt_field_gene_locus:
11925     case CDSGeneProt_field_gene_inference:
11926       ffield = FeatureFieldFromCDSGeneProtField (field);
11927       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11928         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11929         str = GetQualFromFeature (sfp, ffield, scp);
11930       }
11931       ffield = FeatureFieldFree (ffield);
11932       break;
11933     case CDSGeneProt_field_gene_description:
11934       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11935         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11936         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
11937             && (grp = sfp->data.value.ptrvalue) != NULL
11938             && !StringHasNoText (grp->desc)
11939             && DoesStringMatchConstraint(grp->desc, scp))
11940         {
11941           str = StringSave (grp->desc);
11942         }
11943       }
11944       break;
11945     case CDSGeneProt_field_gene_comment:
11946       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11947         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11948         if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
11949         {
11950           str = StringSave (sfp->comment);
11951         }
11952       }
11953       break;
11954     case CDSGeneProt_field_gene_allele:
11955       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11956         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11957         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
11958             && (grp = sfp->data.value.ptrvalue) != NULL
11959             && !StringHasNoText (grp->allele)
11960             && DoesStringMatchConstraint(grp->allele, scp))
11961         {
11962           str = StringSave (grp->allele);
11963         }
11964       }
11965       break;
11966     case CDSGeneProt_field_gene_maploc:
11967       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11968         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11969         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
11970             && (grp = sfp->data.value.ptrvalue) != NULL
11971             && !StringHasNoText (grp->maploc)
11972             && DoesStringMatchConstraint(grp->maploc, scp))
11973         {
11974           str = StringSave (grp->maploc);
11975         }
11976       }
11977       break;
11978     case CDSGeneProt_field_gene_locus_tag:
11979       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11980         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11981         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
11982             && (grp = sfp->data.value.ptrvalue) != NULL
11983             && !StringHasNoText (grp->locus_tag)
11984             && DoesStringMatchConstraint(grp->locus_tag, scp))
11985         {
11986           str = StringSave (grp->locus_tag);
11987         }
11988       }
11989       break;
11990     case CDSGeneProt_field_gene_synonym:
11991       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
11992         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11993         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
11994             && (grp = sfp->data.value.ptrvalue) != NULL)
11995         {
11996           str = GetFirstValNodeStringMatch (grp->syn, scp);
11997         }
11998       }
11999       break;
12000     case CDSGeneProt_field_gene_old_locus_tag:
12001       for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12002         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12003         if (sfp != NULL) {
12004           str = GetFirstGBQualMatch (sfp->qual, "old_locus_tag", 0, scp);
12005         }
12006       }
12007       break;
12008     case CDSGeneProt_field_mrna_product:
12009       for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12010         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12011         if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA
12012             && (rrp = sfp->data.value.ptrvalue) != NULL
12013             && rrp->ext.choice == 1
12014             && !StringHasNoText (rrp->ext.value.ptrvalue)
12015             && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp))
12016         {
12017           str = StringSave (rrp->ext.value.ptrvalue);
12018         }
12019       }
12020       break;
12021     case CDSGeneProt_field_mrna_comment:
12022       for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12023         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12024         if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12025         {
12026           str = StringSave (sfp->comment);
12027         }
12028       }
12029       break;
12030     case CDSGeneProt_field_prot_name:
12031       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12032         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12033         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12034             && sfp->idx.subtype == FEATDEF_PROT
12035             && (prp = sfp->data.value.ptrvalue) != NULL)
12036         {
12037           str = GetFirstValNodeStringMatch (prp->name, scp);
12038         }
12039       }
12040       break;
12041     case CDSGeneProt_field_prot_description:
12042       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12043         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12044         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12045             && sfp->idx.subtype == FEATDEF_PROT
12046             && (prp = sfp->data.value.ptrvalue) != NULL
12047             && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
12048           str = StringSave (prp->desc);
12049         }
12050       }
12051       break;
12052     case CDSGeneProt_field_prot_ec_number:
12053       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12054         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12055         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12056             && sfp->idx.subtype == FEATDEF_PROT
12057             && (prp = sfp->data.value.ptrvalue) != NULL)
12058         {
12059           str = GetFirstValNodeStringMatch (prp->ec, scp);
12060         }
12061       }
12062       break;
12063     case CDSGeneProt_field_prot_activity:
12064       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12065         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12066         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12067             && sfp->idx.subtype == FEATDEF_PROT
12068             && (prp = sfp->data.value.ptrvalue) != NULL)
12069         {
12070           str = GetFirstValNodeStringMatch (prp->activity, scp);
12071         }
12072       }
12073       break;
12074     case CDSGeneProt_field_prot_comment:
12075       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12076         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12077         if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT
12078             && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12079         {
12080           str = StringSave (sfp->comment);
12081         }
12082       }
12083       break;
12084     case CDSGeneProt_field_mat_peptide_name:
12085       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12086         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12087         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12088             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12089             && (prp = sfp->data.value.ptrvalue) != NULL)
12090         {
12091           str = GetFirstValNodeStringMatch (prp->name, scp);
12092         }
12093       }
12094       break;
12095     case CDSGeneProt_field_mat_peptide_description:
12096       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12097         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12098         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12099             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12100             && (prp = sfp->data.value.ptrvalue) != NULL
12101             && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
12102           str = StringSave (prp->desc);
12103         }
12104       }
12105       break;
12106     case CDSGeneProt_field_mat_peptide_ec_number:
12107       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12108         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12109         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12110             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12111             && (prp = sfp->data.value.ptrvalue) != NULL)
12112         {
12113           str = GetFirstValNodeStringMatch (prp->ec, scp);
12114         }
12115       }
12116       break;
12117     case CDSGeneProt_field_mat_peptide_activity:
12118       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12119         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12120         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12121             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12122             && (prp = sfp->data.value.ptrvalue) != NULL)
12123         {
12124           str = GetFirstValNodeStringMatch (prp->activity, scp);
12125         }
12126       }
12127       break;
12128     case CDSGeneProt_field_mat_peptide_comment:
12129       for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
12130         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12131         if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12132             && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12133         {
12134           str = StringSave (sfp->comment);
12135         }
12136       }
12137       break;
12138   }
12139   return str;
12140 }
12141 
12142 
RemoveFieldValueFromCGPSet(CGPSetPtr c,Uint2 field,StringConstraintPtr scp)12143 static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp)
12144 {
12145   Boolean    rval = FALSE;
12146   ValNodePtr vnp;
12147   SeqFeatPtr sfp;
12148   GeneRefPtr grp;
12149   RnaRefPtr  rrp;
12150   ProtRefPtr prp;
12151   FeatureFieldPtr ffield;
12152 
12153   if (c == NULL) return FALSE;
12154   switch (field) {
12155     case CDSGeneProt_field_cds_comment:
12156     case CDSGeneProt_field_cds_inference:
12157     case CDSGeneProt_field_codon_start:
12158       ffield = FeatureFieldFromCDSGeneProtField (field);
12159       for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) {
12160         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12161         rval |= RemoveQualFromFeature (sfp, ffield, scp);
12162       }
12163       ffield = FeatureFieldFree (ffield);
12164       break;
12165     case CDSGeneProt_field_gene_locus:
12166     case CDSGeneProt_field_gene_inference:
12167       ffield = FeatureFieldFromCDSGeneProtField (field);
12168       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12169         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12170         rval |= RemoveQualFromFeature (sfp, ffield, scp);
12171       }
12172       ffield = FeatureFieldFree (ffield);
12173       break;
12174     case CDSGeneProt_field_gene_description:
12175       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12176         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12177         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12178             && (grp = sfp->data.value.ptrvalue) != NULL
12179             && !StringHasNoText (grp->desc)
12180             && DoesStringMatchConstraint(grp->desc, scp))
12181         {
12182           grp->desc = MemFree(grp->desc);
12183           rval = TRUE;
12184         }
12185       }
12186       break;
12187     case CDSGeneProt_field_gene_comment:
12188       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12189         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12190         if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12191         {
12192           sfp->comment = MemFree (sfp->comment);
12193           rval = TRUE;
12194         }
12195       }
12196       break;
12197     case CDSGeneProt_field_gene_allele:
12198       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12199         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12200         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12201             && (grp = sfp->data.value.ptrvalue) != NULL
12202             && !StringHasNoText (grp->allele)
12203             && DoesStringMatchConstraint(grp->allele, scp))
12204         {
12205           grp->allele = MemFree (grp->allele);
12206           rval = TRUE;
12207         }
12208       }
12209       break;
12210     case CDSGeneProt_field_gene_maploc:
12211       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12212         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12213         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12214             && (grp = sfp->data.value.ptrvalue) != NULL
12215             && !StringHasNoText (grp->maploc)
12216             && DoesStringMatchConstraint(grp->maploc, scp))
12217         {
12218           grp->maploc = MemFree (grp->maploc);
12219           rval = TRUE;
12220         }
12221       }
12222       break;
12223     case CDSGeneProt_field_gene_locus_tag:
12224       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12225         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12226         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12227             && (grp = sfp->data.value.ptrvalue) != NULL
12228             && !StringHasNoText (grp->locus_tag)
12229             && DoesStringMatchConstraint(grp->locus_tag, scp))
12230         {
12231           grp->locus_tag = MemFree (grp->locus_tag);
12232           rval = TRUE;
12233         }
12234       }
12235       break;
12236     case CDSGeneProt_field_gene_synonym:
12237       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12238         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12239         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12240             && (grp = sfp->data.value.ptrvalue) != NULL)
12241         {
12242           rval |= RemoveValNodeStringMatch (&(grp->syn), scp);
12243         }
12244       }
12245       break;
12246     case CDSGeneProt_field_gene_old_locus_tag:
12247       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12248         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12249         if (sfp != NULL) {
12250           rval |= RemoveGBQualMatch (&(sfp->qual), "old_locus_tag", 0, scp);
12251         }
12252       }
12253       break;
12254     case CDSGeneProt_field_mrna_product:
12255       for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
12256         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12257         if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA
12258             && (rrp = sfp->data.value.ptrvalue) != NULL
12259             && rrp->ext.choice == 1
12260             && !StringHasNoText (rrp->ext.value.ptrvalue)
12261             && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp))
12262         {
12263           rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12264           rrp->ext.choice = 0;
12265           rval = TRUE;
12266         }
12267       }
12268       break;
12269     case CDSGeneProt_field_mrna_comment:
12270       for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
12271         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12272         if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12273         {
12274           sfp->comment = MemFree (sfp->comment);
12275           rval = TRUE;
12276         }
12277       }
12278       break;
12279     case CDSGeneProt_field_prot_name:
12280       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12281         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12282         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12283             && sfp->idx.subtype == FEATDEF_PROT
12284             && (prp = sfp->data.value.ptrvalue) != NULL)
12285         {
12286           rval |= RemoveValNodeStringMatch (&(prp->name), scp);
12287         }
12288       }
12289       break;
12290     case CDSGeneProt_field_prot_description:
12291       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12292         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12293         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12294             && sfp->idx.subtype == FEATDEF_PROT
12295             && (prp = sfp->data.value.ptrvalue) != NULL
12296             && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
12297           prp->desc = MemFree (prp->desc);
12298           rval = TRUE;
12299         }
12300       }
12301       break;
12302     case CDSGeneProt_field_prot_ec_number:
12303       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12304         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12305         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12306             && sfp->idx.subtype == FEATDEF_PROT
12307             && (prp = sfp->data.value.ptrvalue) != NULL)
12308         {
12309           rval |= RemoveValNodeStringMatch (&(prp->ec), scp);
12310         }
12311       }
12312       break;
12313     case CDSGeneProt_field_prot_activity:
12314       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12315         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12316         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12317             && sfp->idx.subtype == FEATDEF_PROT
12318             && (prp = sfp->data.value.ptrvalue) != NULL)
12319         {
12320           rval |= RemoveValNodeStringMatch (&(prp->activity), scp);
12321         }
12322       }
12323       break;
12324     case CDSGeneProt_field_prot_comment:
12325       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12326         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12327         if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT
12328             && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12329         {
12330           sfp->comment = MemFree (sfp->comment);
12331           rval = TRUE;
12332         }
12333       }
12334       break;
12335     case CDSGeneProt_field_mat_peptide_name:
12336       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12337         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12338         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12339             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12340             && (prp = sfp->data.value.ptrvalue) != NULL)
12341         {
12342           rval |= RemoveValNodeStringMatch (&(prp->name), scp);
12343         }
12344       }
12345       break;
12346     case CDSGeneProt_field_mat_peptide_description:
12347       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12348         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12349         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12350             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12351             && (prp = sfp->data.value.ptrvalue) != NULL
12352             && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
12353           prp->desc = MemFree (prp->desc);
12354           rval = TRUE;
12355         }
12356       }
12357       break;
12358     case CDSGeneProt_field_mat_peptide_ec_number:
12359       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12360         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12361         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12362             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12363             && (prp = sfp->data.value.ptrvalue) != NULL)
12364         {
12365           rval |= RemoveValNodeStringMatch (&(prp->ec), scp);
12366         }
12367       }
12368       break;
12369     case CDSGeneProt_field_mat_peptide_activity:
12370       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12371         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12372         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12373             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12374             && (prp = sfp->data.value.ptrvalue) != NULL)
12375         {
12376           rval |= RemoveValNodeStringMatch (&(prp->activity), scp);
12377         }
12378       }
12379       break;
12380     case CDSGeneProt_field_mat_peptide_comment:
12381       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12382         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12383         if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12384             && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
12385         {
12386           sfp->comment = MemFree (sfp->comment);
12387           rval = TRUE;
12388         }
12389       }
12390       break;
12391   }
12392   return rval;
12393 }
12394 
12395 
CreateGeneForCGPSet(CGPSetPtr c)12396 static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c)
12397 {
12398   SeqFeatPtr gene = NULL, sfp = NULL;
12399   ValNodePtr vnp;
12400 
12401   if (c == NULL) return NULL;
12402 
12403   for (vnp = c->cds_list; vnp != NULL && sfp == NULL; vnp = vnp->next) {
12404     sfp = vnp->data.ptrvalue;
12405   }
12406   for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) {
12407     sfp = vnp->data.ptrvalue;
12408   }
12409   gene = CreateGeneForFeature (sfp);
12410   return gene;
12411 }
12412 
12413 
SetFieldValueInCGPSet(CGPSetPtr c,Uint2 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)12414 static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
12415 {
12416   Boolean    rval = FALSE;
12417   ValNodePtr vnp;
12418   SeqFeatPtr sfp;
12419   GeneRefPtr grp;
12420   ProtRefPtr prp;
12421   FeatureFieldPtr ffield;
12422 
12423   if (c == NULL) return FALSE;
12424   switch (field) {
12425     case CDSGeneProt_field_cds_comment:
12426     case CDSGeneProt_field_cds_inference:
12427     case CDSGeneProt_field_codon_start:
12428       ffield = FeatureFieldFromCDSGeneProtField (field);
12429       for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) {
12430         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12431         rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text);
12432       }
12433       ffield = FeatureFieldFree (ffield);
12434       break;
12435     case CDSGeneProt_field_gene_locus:
12436       if (c->gene_list == NULL && scp == NULL) {
12437         sfp = CreateGeneForCGPSet (c);
12438         if (sfp != NULL) {
12439           ValNodeAddPointer (&(c->gene_list), 0, sfp);
12440         }
12441       }
12442       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12443         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12444         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12445             && (grp = sfp->data.value.ptrvalue) != NULL
12446             && DoesStringMatchConstraint(grp->locus, scp))
12447         {
12448           rval |= SetStringValue ( &(grp->locus), value, existing_text);
12449         }
12450       }
12451       break;
12452     case CDSGeneProt_field_gene_description:
12453     case CDSGeneProt_field_gene_inference:
12454       ffield = FeatureFieldFromCDSGeneProtField (field);
12455       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12456         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12457         rval |= SetQualOnFeature (sfp, ffield, scp, value, existing_text);
12458       }
12459       ffield = FeatureFieldFree (ffield);
12460       break;
12461     case CDSGeneProt_field_gene_comment:
12462       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12463         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12464         if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp))
12465         {
12466           rval |= SetStringValue ( &(sfp->comment), value, existing_text);
12467         }
12468       }
12469       break;
12470     case CDSGeneProt_field_gene_allele:
12471       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12472         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12473         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12474             && (grp = sfp->data.value.ptrvalue) != NULL
12475             && DoesStringMatchConstraint(grp->allele, scp))
12476         {
12477           rval |= SetStringValue (&(grp->allele), value, existing_text);
12478         }
12479       }
12480       break;
12481     case CDSGeneProt_field_gene_maploc:
12482       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12483         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12484         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12485             && (grp = sfp->data.value.ptrvalue) != NULL
12486             && DoesStringMatchConstraint(grp->maploc, scp))
12487         {
12488           rval |= SetStringValue ( &(grp->maploc), value, existing_text);
12489         }
12490       }
12491       break;
12492     case CDSGeneProt_field_gene_locus_tag:
12493       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12494         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12495         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12496             && (grp = sfp->data.value.ptrvalue) != NULL
12497             && DoesStringMatchConstraint(grp->locus_tag, scp))
12498         {
12499           rval |= SetStringValue ( &(grp->locus_tag), value, existing_text);
12500         }
12501       }
12502       break;
12503     case CDSGeneProt_field_gene_synonym:
12504       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12505         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12506         if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
12507             && (grp = sfp->data.value.ptrvalue) != NULL)
12508         {
12509           rval |= SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text);
12510         }
12511       }
12512       break;
12513     case CDSGeneProt_field_gene_old_locus_tag:
12514       ffield = FeatureFieldFromCDSGeneProtField (field);
12515       for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
12516         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12517         if (sfp != NULL) {
12518           rval |= SetStringInGBQualList (&(sfp->qual), ffield->field, scp, value, existing_text);
12519         }
12520       }
12521       ffield = FeatureFieldFree (ffield);
12522       break;
12523     case CDSGeneProt_field_mrna_product:
12524       for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
12525         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12526         rval |= SetRNAProductString (sfp, scp, value, existing_text);
12527       }
12528       break;
12529     case CDSGeneProt_field_mrna_comment:
12530       for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
12531         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12532         if (sfp != NULL&& DoesStringMatchConstraint(sfp->comment, scp))
12533         {
12534           rval |= SetStringValue ( &(sfp->comment), value, existing_text);
12535         }
12536       }
12537       break;
12538     case CDSGeneProt_field_prot_name:
12539       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12540         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12541         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12542             && sfp->idx.subtype == FEATDEF_PROT
12543             && (prp = sfp->data.value.ptrvalue) != NULL)
12544         {
12545           rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
12546         }
12547       }
12548       break;
12549     case CDSGeneProt_field_prot_description:
12550       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12551         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12552         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12553             && sfp->idx.subtype == FEATDEF_PROT
12554             && (prp = sfp->data.value.ptrvalue) != NULL
12555             && DoesStringMatchConstraint(prp->desc, scp)) {
12556           rval |= SetStringValue ( &(prp->desc), value, existing_text);
12557         }
12558       }
12559       break;
12560     case CDSGeneProt_field_prot_ec_number:
12561       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12562         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12563         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12564             && sfp->idx.subtype == FEATDEF_PROT
12565             && (prp = sfp->data.value.ptrvalue) != NULL)
12566         {
12567           rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
12568         }
12569       }
12570       break;
12571     case CDSGeneProt_field_prot_activity:
12572       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12573         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12574         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12575             && sfp->idx.subtype == FEATDEF_PROT
12576             && (prp = sfp->data.value.ptrvalue) != NULL)
12577         {
12578           rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
12579         }
12580       }
12581       break;
12582     case CDSGeneProt_field_prot_comment:
12583       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12584         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12585         if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT
12586             && DoesStringMatchConstraint(sfp->comment, scp))
12587         {
12588           rval |= SetStringValue ( &(sfp->comment), value, existing_text);
12589         }
12590       }
12591       break;
12592     case CDSGeneProt_field_mat_peptide_name:
12593       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12594         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12595         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12596             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12597             && (prp = sfp->data.value.ptrvalue) != NULL)
12598         {
12599           rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
12600         }
12601       }
12602       break;
12603     case CDSGeneProt_field_mat_peptide_description:
12604       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12605         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12606         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12607             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12608             && (prp = sfp->data.value.ptrvalue) != NULL
12609             && DoesStringMatchConstraint(prp->desc, scp)) {
12610           rval |= SetStringValue ( &(prp->desc), value, existing_text);
12611         }
12612       }
12613       break;
12614     case CDSGeneProt_field_mat_peptide_ec_number:
12615       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12616         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12617         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12618             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12619             && (prp = sfp->data.value.ptrvalue) != NULL)
12620         {
12621           rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
12622         }
12623       }
12624       break;
12625     case CDSGeneProt_field_mat_peptide_activity:
12626       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12627         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12628         if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
12629             && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12630             && (prp = sfp->data.value.ptrvalue) != NULL)
12631         {
12632           rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
12633         }
12634       }
12635       break;
12636     case CDSGeneProt_field_mat_peptide_comment:
12637       for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12638         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12639         if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa
12640             && DoesStringMatchConstraint(sfp->comment, scp))
12641         {
12642           rval |= SetStringValue ( &(sfp->comment), value, existing_text);
12643         }
12644       }
12645       break;
12646   }
12647   return rval;
12648 }
12649 
12650 
SortFieldInCGPSet(CGPSetPtr c,Uint2 field,Uint2 order)12651 static Boolean SortFieldInCGPSet (CGPSetPtr c, Uint2 field, Uint2 order)
12652 {
12653   ValNodePtr vnp;
12654   SeqFeatPtr sfp;
12655   Boolean    rval = FALSE;
12656 
12657   if (c == NULL) {
12658     return FALSE;
12659   }
12660   if (field == CDSGeneProt_field_prot_name) {
12661     for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
12662       sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12663       if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT) {
12664         rval |= SortProtNames (sfp, order);
12665       }
12666     }
12667   }
12668   return rval;
12669 }
12670 
12671 
GetMolInfoForBioseq(BioseqPtr bsp)12672 static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp)
12673 {
12674   MolInfoPtr m = NULL;
12675   SeqDescrPtr sdp;
12676 
12677   if (bsp == NULL) return NULL;
12678   sdp = bsp->descr;
12679   while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
12680     sdp = sdp->next;
12681   }
12682   if (sdp != NULL) {
12683     m = (MolInfoPtr) sdp->data.ptrvalue;
12684   }
12685   return m;
12686 }
12687 
12688 
GetSequenceQualFromBioseq(BioseqPtr bsp,ValNodePtr field)12689 static CharPtr GetSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field)
12690 {
12691   CharPtr rval = NULL;
12692   MolInfoPtr m;
12693 
12694   if (bsp == NULL || field == NULL) return NULL;
12695 
12696   switch (field->choice) {
12697     case MolinfoField_molecule:
12698       m = GetMolInfoForBioseq (bsp);
12699       if (m != NULL) {
12700         rval = BiomolNameFromBiomol (m->biomol);
12701       }
12702       break;
12703     case MolinfoField_technique:
12704       m = GetMolInfoForBioseq (bsp);
12705       if (m != NULL) {
12706         rval = TechNameFromTech (m->tech);
12707       }
12708       break;
12709     case MolinfoField_completedness:
12710       m = GetMolInfoForBioseq (bsp);
12711       if (m != NULL) {
12712         rval = CompletenessNameFromCompleteness (m->completeness);
12713       }
12714       break;
12715     case MolinfoField_mol_class:
12716       rval = MolNameFromMol (bsp->mol);
12717       break;
12718     case MolinfoField_topology:
12719       rval = TopologyNameFromTopology (bsp->topology);
12720       break;
12721     case MolinfoField_strand:
12722       rval = StrandNameFromStrand (bsp->strand);
12723       break;
12724   }
12725   if (rval != NULL) rval = StringSave (rval);
12726   return rval;
12727 }
12728 
12729 
RemoveSequenceQualFromBioseq(BioseqPtr bsp,ValNodePtr field)12730 static Boolean RemoveSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field)
12731 {
12732   MolInfoPtr m;
12733   Boolean    rval = FALSE;
12734 
12735   if (bsp == NULL || field == NULL) return FALSE;
12736 
12737   switch (field->choice) {
12738     case MolinfoField_molecule:
12739       m = GetMolInfoForBioseq (bsp);
12740       if (m != NULL) {
12741         m->biomol = 0;
12742         rval = TRUE;
12743       }
12744       break;
12745     case MolinfoField_technique:
12746       m = GetMolInfoForBioseq (bsp);
12747       if (m != NULL) {
12748         m->tech = 0;
12749         rval = TRUE;
12750       }
12751       break;
12752     case MolinfoField_completedness:
12753       m = GetMolInfoForBioseq (bsp);
12754       if (m != NULL) {
12755         m->completeness = 0;
12756         rval = TRUE;
12757       }
12758       break;
12759     case MolinfoField_mol_class:
12760       bsp->mol = 0;
12761       rval = TRUE;
12762       break;
12763     case MolinfoField_topology:
12764       bsp->topology = 0;
12765       rval = TRUE;
12766       break;
12767     case MolinfoField_strand:
12768       bsp->strand = 0;
12769       rval = TRUE;
12770       break;
12771   }
12772   return rval;
12773 }
12774 
12775 
AddMolInfoToBioseq(BioseqPtr bsp)12776 static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp)
12777 {
12778   SeqDescrPtr sdp;
12779   MolInfoPtr  m;
12780 
12781   sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_molinfo);
12782   m = MolInfoNew ();
12783   sdp->data.ptrvalue = m;
12784   return m;
12785 }
12786 
12787 
SetSequenceQualOnBioseq(BioseqPtr bsp,ValNodePtr field)12788 static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field)
12789 {
12790   MolInfoPtr m = NULL;
12791   Boolean    rval = FALSE;
12792   Int4       new_val;
12793 
12794   if (bsp == NULL || field == NULL) return FALSE;
12795 
12796   switch (field->choice) {
12797     case MolinfoField_molecule:
12798       if (m == NULL) {
12799         m = GetMolInfoForBioseq (bsp);
12800         if (m == NULL) {
12801           m = AddMolInfoToBioseq (bsp);
12802           rval = TRUE;
12803         }
12804       }
12805       new_val = BiomolFromMoleculeType (field->data.intvalue);
12806       if (m->biomol != new_val) {
12807         m->biomol = new_val;
12808         rval = TRUE;
12809       }
12810       break;
12811     case MolinfoField_technique:
12812       if (m == NULL) {
12813         m = GetMolInfoForBioseq (bsp);
12814         if (m == NULL) {
12815           m = AddMolInfoToBioseq (bsp);
12816         }
12817       }
12818       new_val = TechFromTechniqueType (field->data.intvalue);
12819       if (m->tech != new_val) {
12820         m->tech = new_val;
12821         rval = TRUE;
12822       }
12823       break;
12824     case MolinfoField_completedness:
12825       if (m == NULL) {
12826         m = GetMolInfoForBioseq (bsp);
12827         if (m == NULL) {
12828           m = AddMolInfoToBioseq (bsp);
12829         }
12830       }
12831       new_val = CompletenessFromCompletednessType (field->data.intvalue);
12832       if (m->completeness != new_val) {
12833         m->completeness = new_val;
12834         rval = TRUE;
12835       }
12836       break;
12837     case MolinfoField_mol_class:
12838       new_val = MolFromMoleculeClassType (field->data.intvalue);
12839       if (bsp->mol != new_val) {
12840         bsp->mol = new_val;
12841         rval = TRUE;
12842       }
12843       break;
12844     case MolinfoField_topology:
12845       new_val = TopologyFromTopologyType (field->data.intvalue);
12846       if (bsp->topology != new_val) {
12847         bsp->topology = new_val;
12848         rval = TRUE;
12849       }
12850       break;
12851     case MolinfoField_strand:
12852       new_val = StrandFromStrandType (field->data.intvalue);
12853       if (bsp->strand != new_val) {
12854         bsp->strand = new_val;
12855         rval = TRUE;
12856       }
12857       break;
12858   }
12859   return rval;
12860 }
12861 
12862 
GetGenomeProjectIdFromBioseq(BioseqPtr bsp,StringConstraintPtr scp)12863 static CharPtr GetGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp)
12864 {
12865   SeqDescrPtr       sdp;
12866   SeqMgrDescContext context;
12867   Char              buf[50];
12868   UserObjectPtr     uop;
12869   UserFieldPtr      ufp;
12870 
12871   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
12872   while (sdp != NULL) {
12873     uop = (UserObjectPtr) sdp->data.ptrvalue;
12874     if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0)
12875     {
12876       ufp = uop->data;
12877       while (ufp != NULL) {
12878         if (ufp->label != NULL
12879             && StringCmp (ufp->label->str, "ProjectID") == 0
12880             && ufp->choice == 2) {
12881           sprintf (buf, "%d", ufp->data.intvalue);
12882           if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
12883             return StringSave (buf);
12884           }
12885         }
12886         ufp = ufp->next;
12887       }
12888     }
12889     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context);
12890   }
12891 
12892   return NULL;
12893 }
12894 
12895 
RemoveGenomeProjectIdFromBioseq(BioseqPtr bsp,StringConstraintPtr scp)12896 static Boolean RemoveGenomeProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp)
12897 {
12898   SeqDescrPtr       sdp;
12899   SeqMgrDescContext context;
12900   Char              buf[50];
12901   UserObjectPtr     uop;
12902   UserFieldPtr      ufp;
12903   ObjValNodePtr     ovn;
12904   Boolean           rval = FALSE;
12905 
12906   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
12907   while (sdp != NULL) {
12908     uop = (UserObjectPtr) sdp->data.ptrvalue;
12909     if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0)
12910     {
12911       ufp = uop->data;
12912       while (ufp != NULL) {
12913         if (ufp->label != NULL
12914             && StringCmp (ufp->label->str, "ProjectID") == 0
12915             && ufp->choice == 2) {
12916           sprintf (buf, "%d", ufp->data.intvalue);
12917           if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
12918             if (sdp->extended != 0) {
12919               ovn = (ObjValNodePtr) sdp;
12920               ovn->idx.deleteme = TRUE;
12921               rval = TRUE;
12922             }
12923           }
12924         }
12925         ufp = ufp->next;
12926       }
12927     }
12928     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context);
12929   }
12930   return rval;
12931 }
12932 
12933 
SetGenomeProjectIdOnBioseq(BioseqPtr bsp,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)12934 static Boolean SetGenomeProjectIdOnBioseq (BioseqPtr bsp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
12935 {
12936   SeqDescrPtr       sdp;
12937   SeqMgrDescContext context;
12938   Char              buf[50];
12939   CharPtr           tmp;
12940   UserObjectPtr     uop;
12941   UserFieldPtr      ufp;
12942   Boolean           rval = FALSE;
12943 
12944   if (bsp == NULL || !StringIsAllDigits (value)) {
12945     return FALSE;
12946   }
12947   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
12948   while (sdp != NULL) {
12949     uop = (UserObjectPtr) sdp->data.ptrvalue;
12950     if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "GenomeProjectsDB") == 0)
12951     {
12952       ufp = uop->data;
12953       while (ufp != NULL) {
12954         if (ufp->label != NULL
12955             && StringCmp (ufp->label->str, "ProjectID") == 0
12956             && ufp->choice == 2) {
12957           sprintf (buf, "%d", ufp->data.intvalue);
12958           if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
12959             tmp = StringSave (buf);
12960             if (SetStringValue (&tmp, value, existing_text) && StringIsAllDigits (tmp)) {
12961               ufp->data.intvalue = atoi (tmp);
12962               rval = TRUE;
12963             }
12964             tmp = MemFree (tmp);
12965           }
12966         }
12967         ufp = ufp->next;
12968       }
12969     }
12970     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context);
12971   }
12972   if (!rval && IsStringConstraintEmpty (scp)) {
12973     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
12974     uop = CreateGenomeProjectsDBUserObject ();
12975     AddIDsToGenomeProjectsDBUserObject (uop, atoi (value), 0);
12976     sdp->data.ptrvalue = uop;
12977     rval = TRUE;
12978   }
12979   return rval;
12980 }
12981 
12982 
GetBioProjectIdFromBioseq(BioseqPtr bsp,StringConstraintPtr scp)12983 NLM_EXTERN CharPtr GetBioProjectIdFromBioseq (BioseqPtr bsp, StringConstraintPtr scp)
12984 {
12985   SeqDescrPtr       sdp;
12986   SeqMgrDescContext context;
12987   CharPtr           val;
12988   UserObjectPtr     uop;
12989   UserFieldPtr      ufp;
12990   CharPtr PNTR      cpp;
12991   Int4              i;
12992   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
12993   while (sdp != NULL)
12994   {
12995     uop = (UserObjectPtr) sdp->data.ptrvalue;
12996     if (uop != NULL && uop->type != NULL && StringCmp (uop->type->str, "DBLink") == 0)
12997     {
12998       ufp = uop->data;
12999       while (ufp != NULL) {
13000             if (ufp->label != NULL && StringCmp (ufp->label->str, "BioProject") == 0)
13001         {
13002           if (ufp->choice == 1)
13003           {
13004             val = ufp->data.ptrvalue;
13005             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (val, scp))
13006             {
13007               return StringSave (val);
13008             }
13009                 }
13010           else if (ufp->choice == 7 && ufp->num > 0 && (cpp = (CharPtr PNTR) ufp->data.ptrvalue) != NULL)
13011           {
13012               for (i = 0; i < ufp->num; i++)
13013             {
13014                   if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp))
13015               {
13016                     return StringSave (cpp[i]);
13017                   }
13018             }
13019           }
13020         }
13021         ufp = ufp->next;
13022       }
13023     }
13024     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context);
13025   }
13026   return NULL;
13027 }
13028 
13029 
SetTextDescriptor(SeqDescrPtr sdp,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)13030 static Boolean SetTextDescriptor (SeqDescrPtr sdp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
13031 {
13032   Boolean rval = FALSE;
13033   CharPtr cp;
13034   ObjValNodePtr ovp;
13035   Boolean was_empty;
13036 
13037   if (sdp == NULL) {
13038     return FALSE;
13039   }
13040 
13041   if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) {
13042     if (StringHasNoText (sdp->data.ptrvalue)) {
13043       was_empty = TRUE;
13044     } else {
13045       was_empty = FALSE;
13046     }
13047     cp = sdp->data.ptrvalue;
13048     if (SetStringValue (&cp, value, existing_text)) {
13049       rval = TRUE;
13050     }
13051     sdp->data.ptrvalue = cp;
13052     if (was_empty) {
13053       ovp = (ObjValNodePtr) sdp;
13054       ovp->idx.deleteme = FALSE;
13055     }
13056   }
13057 
13058   return rval;
13059 }
13060 
13061 
s_StringEndsWith(CharPtr str,CharPtr end)13062 static CharPtr s_StringEndsWith (CharPtr str, CharPtr end)
13063 {
13064   Int4 str_len, end_len;
13065   if (end == NULL || str == NULL) {
13066     return NULL;
13067   }
13068   str_len = StringLen (str);
13069   end_len = StringLen (end);
13070   if (end_len > str_len) {
13071     return NULL;
13072   }
13073   if (StringCmp (str + str_len - end_len, end) == 0) {
13074     return str + str_len - end_len;
13075   } else {
13076     return NULL;
13077   }
13078 }
13079 
13080 
DbnameValFromPrefixOrSuffix(CharPtr val)13081 static CharPtr DbnameValFromPrefixOrSuffix (CharPtr val)
13082 {
13083   CharPtr rval = NULL, stop;
13084 
13085   if (val == NULL) {
13086     return NULL;
13087   }
13088 
13089   if (StringNCmp (val, "##", 2) == 0) {
13090     val += 2;
13091   }
13092   rval = StringSave (val);
13093   if ((stop = s_StringEndsWith (rval, "-START##")) != NULL
13094       || (stop = s_StringEndsWith (rval, "-START##")) != NULL
13095       || (stop = s_StringEndsWith (rval, "START##")) != NULL
13096       || (stop = s_StringEndsWith (rval, "-END##")) != NULL
13097       || (stop = s_StringEndsWith (rval, "END##")) != NULL) {
13098     *stop = 0;
13099   }
13100   return rval;
13101 }
13102 
13103 
IsUserFieldStructuredCommentPrefixOrSuffix(UserFieldPtr ufp)13104 NLM_EXTERN Boolean IsUserFieldStructuredCommentPrefixOrSuffix (UserFieldPtr ufp)
13105 {
13106   if (ufp == NULL || ufp->label == NULL) {
13107     return FALSE;
13108   } else if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0
13109     || StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) {
13110     return TRUE;
13111   } else {
13112     return FALSE;
13113   }
13114 }
13115 
13116 
GetStructuredCommentFieldFromUserObject(UserObjectPtr uop,StructuredCommentFieldPtr field,StringConstraintPtr scp)13117 NLM_EXTERN CharPtr GetStructuredCommentFieldFromUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp)
13118 {
13119   UserFieldPtr      curr;
13120   CharPtr           rval = NULL;
13121 
13122   if (!IsUserObjectStructuredComment(uop) || field == NULL) {
13123     return NULL;
13124   }
13125 
13126   if (field->choice == StructuredCommentField_database) {
13127     for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) {
13128       if (IsUserFieldStructuredCommentPrefixOrSuffix(curr) && curr->choice == 1) {
13129         rval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue);
13130         if (!IsStringConstraintEmpty (scp) &&  !DoesStringMatchConstraint (rval, scp)) {
13131           rval = MemFree (rval);
13132         }
13133       }
13134     }
13135   } else if (field->choice == StructuredCommentField_named) {
13136     for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) {
13137       if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) {
13138         if (curr->choice == 1) {
13139           if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) {
13140             rval = StringSave (curr->data.ptrvalue);
13141           }
13142         }
13143       }
13144     }
13145   } else if (field->choice == StructuredCommentField_field_name) {
13146     for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) {
13147       if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr)
13148           && DoesObjectIdMatchStringConstraint(curr->label, scp)) {
13149         rval = GetObjectIdString (curr->label);
13150       }
13151     }
13152   }
13153   return rval;
13154 }
13155 
13156 
RemoveStructuredCommentFieldFromUserObject(UserObjectPtr uop,ValNodePtr field,StringConstraintPtr scp)13157 static Boolean RemoveStructuredCommentFieldFromUserObject (UserObjectPtr uop, ValNodePtr field, StringConstraintPtr scp)
13158 {
13159   UserFieldPtr      curr, prev = NULL, ufp_next;
13160   Boolean           rval = FALSE, do_remove;
13161   CharPtr           val;
13162 
13163   if (!IsUserObjectStructuredComment(uop) || field == NULL) {
13164     return FALSE;
13165   }
13166 
13167   if (field->choice == StructuredCommentField_database) {
13168     for (curr = uop->data; curr != NULL; curr = ufp_next) {
13169       do_remove = FALSE;
13170       ufp_next = curr->next;
13171       if (IsUserFieldStructuredCommentPrefixOrSuffix (curr)
13172               && curr->choice == 1) {
13173         val = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue);
13174         if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (val, scp)) {
13175           do_remove = TRUE;
13176         }
13177         val = MemFree (val);
13178       }
13179       if (do_remove) {
13180         if (prev == NULL) {
13181           uop->data = curr->next;
13182         } else {
13183           prev->next = curr->next;
13184         }
13185         curr->next = NULL;
13186         curr = UserFieldFree (curr);
13187         rval = TRUE;
13188       } else {
13189         prev = curr;
13190       }
13191     }
13192   } else if (field->choice == StructuredCommentField_named) {
13193     for (curr = uop->data; curr != NULL; curr = ufp_next) {
13194       do_remove = FALSE;
13195       ufp_next = curr->next;
13196       if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) {
13197         if (curr->choice == 1) {
13198           if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) {
13199             do_remove = TRUE;
13200           }
13201         }
13202       }
13203       if (do_remove) {
13204         if (prev == NULL) {
13205           uop->data = curr->next;
13206         } else {
13207           prev->next = curr->next;
13208         }
13209         curr->next = NULL;
13210         curr = UserFieldFree (curr);
13211         rval = TRUE;
13212       } else {
13213         prev = curr;
13214       }
13215     }
13216   } else if (field->choice == StructuredCommentField_field_name) {
13217     for (curr = uop->data; curr != NULL; curr = ufp_next) {
13218       do_remove = FALSE;
13219       ufp_next = curr->next;
13220       if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr) && DoesObjectIdMatchStringConstraint (curr->label, scp)) {
13221         if (prev == NULL) {
13222           uop->data = curr->next;
13223         } else {
13224           prev->next = curr->next;
13225         }
13226         curr->next = NULL;
13227         curr = UserFieldFree (curr);
13228         rval = TRUE;
13229       } else {
13230         prev = curr;
13231       }
13232     }
13233   }
13234   return rval;
13235 }
13236 
13237 
SetStructuredCommentFieldOnUserObject(UserObjectPtr uop,StructuredCommentFieldPtr field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)13238 static Boolean SetStructuredCommentFieldOnUserObject (UserObjectPtr uop, StructuredCommentFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
13239 {
13240   UserFieldPtr      curr, first = NULL, last = NULL, ufp;
13241   Boolean           rval = FALSE;
13242   CharPtr           oldval, newval, fmt;
13243   CharPtr           prefix_fmt = "##%s-START##";
13244   CharPtr           suffix_fmt = "##%s-END##";
13245 
13246   if (!IsUserObjectStructuredComment(uop) || field == NULL) {
13247     return FALSE;
13248   }
13249 
13250   if (field->choice == StructuredCommentField_database) {
13251     first = uop->data;
13252     curr = first;
13253     while (curr != NULL) {
13254       if (IsUserFieldStructuredCommentPrefixOrSuffix (curr)
13255               && curr->choice == 1) {
13256         oldval = DbnameValFromPrefixOrSuffix (curr->data.ptrvalue);
13257         if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (oldval, scp)) {
13258           if (StringCmp (curr->label->str, "StructuredCommentPrefix") == 0) {
13259             fmt = prefix_fmt;
13260           } else {
13261             fmt = suffix_fmt;
13262           }
13263           SetStringValue (&oldval, value, existing_text);
13264           newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (oldval)));
13265           sprintf (newval, fmt, oldval);
13266           curr->data.ptrvalue = MemFree (curr->data.ptrvalue);
13267           curr->data.ptrvalue = newval;
13268           rval = TRUE;
13269         }
13270         oldval = MemFree (oldval);
13271       }
13272       last = curr;
13273       curr = curr->next;
13274     }
13275     if (!rval && IsStringConstraintEmpty (scp)) {
13276       /* make prefix */
13277       curr = UserFieldNew ();
13278       curr->label = ObjectIdNew ();
13279       curr->label->str = StringSave ("StructuredCommentPrefix");
13280       curr->choice = 1;
13281       newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (value)));
13282       sprintf (newval, prefix_fmt, value);
13283       curr->data.ptrvalue = newval;
13284       curr->next = first;
13285       uop->data = curr;
13286       first = curr;
13287 
13288       /* make suffix */
13289       curr = UserFieldNew ();
13290       curr->label = ObjectIdNew ();
13291       curr->label->str = StringSave ("StructuredCommentSuffix");
13292       curr->choice = 1;
13293       newval = (CharPtr) MemNew (sizeof (Char) * (StringLen (suffix_fmt) + StringLen (value)));
13294       sprintf (newval, suffix_fmt, value);
13295       curr->data.ptrvalue = newval;
13296       if (last == NULL) {
13297         first->next = curr;
13298       } else {
13299         last->next = curr;
13300       }
13301       rval = TRUE;
13302     }
13303   } else if (field->choice == StructuredCommentField_named) {
13304     last = uop->data;
13305     for (curr = uop->data; curr != NULL; curr = curr->next) {
13306       if (curr->label != NULL && StringICmp (curr->label->str, field->data.ptrvalue) == 0) {
13307         if (curr->choice == 1) {
13308           if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (curr->data.ptrvalue, scp)) {
13309             newval = (CharPtr) curr->data.ptrvalue;
13310             SetStringValue (&newval, value, existing_text);
13311             curr->data.ptrvalue = newval;
13312             rval = TRUE;
13313           }
13314         }
13315       }
13316       last = curr;
13317     }
13318     if (!rval && IsStringConstraintEmpty (scp)) {
13319       curr = UserFieldNew ();
13320       curr->label = ObjectIdNew ();
13321       curr->label->str = StringSave (field->data.ptrvalue);
13322       curr->choice = 1;
13323       curr->data.ptrvalue = StringSave (value);
13324       if (last == NULL) {
13325         uop->data = curr;
13326       } else {
13327         last->next = curr;
13328       }
13329       rval = TRUE;
13330     }
13331   } else if (field->choice == StructuredCommentField_field_name) {
13332     last = uop->data;
13333     for (curr = uop->data; curr != NULL; curr = curr->next) {
13334       if (!IsUserFieldStructuredCommentPrefixOrSuffix (curr)) {
13335         if (DoesObjectIdMatchStringConstraint (curr->label, scp)) {
13336           rval = SetObjectIdString (curr->label, value, existing_text);
13337         }
13338         last = curr;
13339       }
13340     }
13341     if (!rval && IsStringConstraintEmpty (scp)) {
13342       curr = UserFieldNew ();
13343       curr->label = ObjectIdNew ();
13344       curr->label->str = StringSave (value);
13345       curr->choice = 1;
13346       curr->data.ptrvalue = StringSave ("");
13347       if (last == NULL) {
13348         ufp = uop->data;
13349         if (ufp != NULL) {
13350           curr->next = ufp->next;
13351           ufp->next = curr;
13352         }
13353       } else {
13354         curr->next = last->next;
13355         last->next = curr;
13356       }
13357       rval = TRUE;
13358     }
13359   }
13360   return rval;
13361 }
13362 
13363 
13364 typedef struct dblinkname {
13365   Int4    field_type;
13366   CharPtr field_name;
13367 } DBLinkNameData, PNTR DBLinkNamePtr;
13368 
13369 static DBLinkNameData dblink_names[] = {
13370   { DBLink_field_type_trace_assembly , "Trace Assembly Archive" } ,
13371   { DBLink_field_type_bio_sample , "BioSample" } ,
13372   { DBLink_field_type_probe_db , "ProbeDB" } ,
13373   { DBLink_field_type_sequence_read_archve , "Sequence Read Archive" } ,
13374   { DBLink_field_type_bio_project , "BioProject" } ,
13375   { DBLink_field_type_assembly , "Assembly" }
13376 };
13377 
13378 #define NUM_dblinkname sizeof (dblink_names) / sizeof (DBLinkNameData)
13379 
GetDBLinkNameFromDBLinkFieldType(Int4 field_type)13380 NLM_EXTERN CharPtr GetDBLinkNameFromDBLinkFieldType (Int4 field_type)
13381 {
13382   CharPtr str = NULL;
13383   Int4 i;
13384 
13385   for (i = 0; i < NUM_dblinkname && str == NULL; i++) {
13386     if (field_type == dblink_names[i].field_type) {
13387       str = dblink_names[i].field_name;
13388     }
13389   }
13390   if (str == NULL) {
13391     str = "Unknown field type";
13392   }
13393   return str;
13394 }
13395 
13396 
GetDBLinkFieldTypeFromDBLinkName(CharPtr field_name)13397 NLM_EXTERN Int4 GetDBLinkFieldTypeFromDBLinkName (CharPtr field_name)
13398 {
13399   Int4 rval = -1;
13400   Int4 i;
13401 
13402   for (i = 0; i < NUM_dblinkname && rval < 0; i++) {
13403     if (StringCmp (field_name, dblink_names[i].field_name) == 0) {
13404       rval = dblink_names[i].field_type;
13405     }
13406   }
13407   return rval;
13408 }
13409 
13410 
GetNumDBLinkFields(void)13411 NLM_EXTERN Int4 GetNumDBLinkFields (void)
13412 {
13413   return NUM_dblinkname;
13414 }
13415 
13416 
GetDBLinkFieldFromUserObject(UserObjectPtr uop,Int4 field,StringConstraintPtr scp)13417 static CharPtr GetDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
13418 {
13419   UserFieldPtr      curr;
13420   CharPtr           rval = NULL;
13421   CharPtr           field_name;
13422   Char              buf[15];
13423   CharPtr PNTR      cpp;
13424   Int4Ptr           ipp;
13425   Int4              i;
13426 
13427   if (!IsUserObjectDBLink(uop) || field < 1) {
13428     return NULL;
13429   }
13430 
13431   field_name = GetDBLinkNameFromDBLinkFieldType (field);
13432   for (curr = uop->data; curr != NULL && rval == NULL; curr = curr->next) {
13433     if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
13434       if (curr->choice == 7) {
13435         if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
13436           for (i = 0; i < curr->num && rval == NULL; i++) {
13437             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
13438               rval = StringSave (cpp[i]);
13439             }
13440           }
13441         }
13442       } else if (curr->choice == 8) {
13443         if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
13444           for (i = 0; i < curr->num && rval == NULL; i++) {
13445             sprintf (buf, "%d", ipp[i]);
13446             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
13447               rval = StringSave (buf);
13448             }
13449           }
13450         }
13451       }
13452     }
13453   }
13454   return rval;
13455 }
13456 
13457 
GetMultipleDBLinkFieldValuesFromUserObject(UserObjectPtr uop,Int4 field,StringConstraintPtr scp)13458 static ValNodePtr GetMultipleDBLinkFieldValuesFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
13459 {
13460   UserFieldPtr      curr;
13461   ValNodePtr        rval = NULL;
13462   CharPtr           field_name;
13463   Char              buf[15];
13464   CharPtr PNTR      cpp;
13465   Int4Ptr           ipp;
13466   Int4              i;
13467 
13468   if (!IsUserObjectDBLink(uop) || field < 1) {
13469     return NULL;
13470   }
13471 
13472   field_name = GetDBLinkNameFromDBLinkFieldType (field);
13473   for (curr = uop->data; curr != NULL; curr = curr->next) {
13474     if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
13475       if (curr->choice == 7) {
13476         if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
13477           for (i = 0; i < curr->num; i++) {
13478             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
13479               ValNodeAddPointer (&rval, 0, StringSave (cpp[i]));
13480             }
13481           }
13482         }
13483       } else if (curr->choice == 8) {
13484         if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
13485           for (i = 0; i < curr->num; i++) {
13486             sprintf (buf, "%d", ipp[i]);
13487             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
13488               ValNodeAddPointer (&rval, 0, StringSave (buf));
13489             }
13490           }
13491         }
13492       }
13493     }
13494   }
13495   return rval;
13496 }
13497 
13498 
RemoveDBLinkFieldFromUserObject(UserObjectPtr uop,Int4 field,StringConstraintPtr scp)13499 static Boolean RemoveDBLinkFieldFromUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp)
13500 {
13501   UserFieldPtr      curr, prev_type = NULL, next_type;
13502   Boolean           rval = FALSE;
13503   Char              buf[15];
13504   CharPtr           field_name;
13505   CharPtr PNTR      cpp;
13506   Int4Ptr           ipp;
13507   Int4              i, j;
13508 
13509   if (!IsUserObjectDBLink(uop) || field < 1) {
13510     return FALSE;
13511   }
13512 
13513   field_name = GetDBLinkNameFromDBLinkFieldType (field);
13514   for (curr = uop->data; curr != NULL; curr = next_type) {
13515     next_type = curr->next;
13516     if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
13517       if (curr->choice == 7) {
13518         if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL) {
13519           for (i = 0; i < curr->num; i++) {
13520             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
13521               cpp[i] = MemFree (cpp[i]);
13522               for (j = i + 1; j < curr->num; j++) {
13523                 cpp[j - 1] = cpp[j];
13524               }
13525               curr->num--;
13526               rval = TRUE;
13527               i--;
13528             }
13529           }
13530         }
13531       } else if (curr->choice == 8) {
13532         if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL) {
13533           for (i = 0; i < curr->num; i++) {
13534             sprintf (buf, "%d", ipp[i]);
13535             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
13536               for (j = i + 1; j < curr->num; j++) {
13537                 ipp[j - 1] = ipp[j];
13538               }
13539               curr->num--;
13540               rval = TRUE;
13541               i--;
13542             }
13543           }
13544         }
13545       }
13546     }
13547     if (curr->num == 0) {
13548       if (prev_type == NULL) {
13549         uop->data = next_type;
13550       } else {
13551         prev_type->next = next_type;
13552       }
13553       curr->next = NULL;
13554       curr = UserFieldFree (curr);
13555     } else {
13556       prev_type = curr;
13557     }
13558   }
13559 
13560   return rval;
13561 }
13562 
13563 
SetDBLinkFieldOnUserObject(UserObjectPtr uop,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)13564 static Boolean SetDBLinkFieldOnUserObject (UserObjectPtr uop, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
13565 {
13566   UserFieldPtr      curr, last = NULL;
13567   Boolean           rval = FALSE;
13568   CharPtr           newval;
13569   CharPtr           field_name;
13570   CharPtr PNTR      cpp = NULL;
13571   CharPtr PNTR      new_cpp;
13572   Int4Ptr           ipp = NULL, new_ipp;
13573   Int4              i;
13574   Char              buf[15];
13575 
13576   if (!IsUserObjectDBLink(uop) || field < 1) {
13577     return FALSE;
13578   }
13579 
13580   field_name = GetDBLinkNameFromDBLinkFieldType (field);
13581 
13582   for (curr = uop->data; curr != NULL; curr = curr->next) {
13583     if (curr->label != NULL && StringCmp (curr->label->str, field_name) == 0) {
13584       if (curr->choice == 7) {
13585         if (curr->num > 0 && (cpp = (CharPtr PNTR) curr->data.ptrvalue) != NULL && existing_text != ExistingTextOption_add_qual) {
13586           for (i = 0; i < curr->num; i++) {
13587             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (cpp[i], scp)) {
13588               newval = cpp[i];
13589               SetStringValue (&newval, value, existing_text);
13590               cpp[i] = newval;
13591               rval = TRUE;
13592             }
13593           }
13594         }
13595         if (!rval && IsStringConstraintEmpty (scp)) {
13596           new_cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (curr->num + 1));
13597           if (cpp != NULL) {
13598             for (i = 0; i < curr->num; i++) {
13599               new_cpp[i] = cpp[i];
13600               cpp[i] = NULL;
13601             }
13602             new_cpp[i] = StringSave (value);
13603           }
13604           cpp = MemFree (cpp);
13605           curr->data.ptrvalue = new_cpp;
13606           curr->num++;
13607           rval = TRUE;
13608         }
13609       } else if (curr->choice == 8 && StringIsAllDigits (value)) {
13610         if (curr->num > 0 && (ipp = (Int4Ptr) curr->data.ptrvalue) != NULL && existing_text != ExistingTextOption_add_qual) {
13611           for (i = 0; i < curr->num; i++) {
13612             sprintf (buf, "%d", ipp[i]);
13613             if (IsStringConstraintEmpty (scp) || DoesStringMatchConstraint (buf, scp)) {
13614               newval = StringSave (buf);
13615               SetStringValue (&newval, value, existing_text);
13616               if (StringIsAllDigits (newval)) {
13617                 ipp[i] = atoi (newval);
13618                 rval = TRUE;
13619               }
13620               newval = MemFree (newval);
13621             }
13622           }
13623         }
13624         if (!rval && IsStringConstraintEmpty (scp)) {
13625           new_ipp = (Int4Ptr) MemNew (sizeof (Int4) * (curr->num + 1));
13626           if (ipp != NULL) {
13627             for (i = 0; i < curr->num; i++) {
13628               new_ipp[i] = ipp[i];
13629             }
13630             new_ipp[i] = atoi (value);
13631           }
13632           ipp = MemFree (ipp);
13633           curr->data.ptrvalue = new_ipp;
13634           curr->num++;
13635           rval = TRUE;
13636         }
13637       }
13638     }
13639     last = curr;
13640   }
13641   if (!rval && IsStringConstraintEmpty (scp) && (field != DBLink_field_type_trace_assembly || StringIsAllDigits (value))) {
13642     curr = UserFieldNew ();
13643     curr->label = ObjectIdNew ();
13644     curr->label->str = StringSave (field_name);
13645 
13646     if (field == DBLink_field_type_trace_assembly) {
13647       curr->choice = 8;
13648       curr->num = 1;
13649       ipp = (Int4Ptr) MemNew (sizeof (Int4) * curr->num);
13650       ipp[0] = atoi (value);
13651       curr->data.ptrvalue = ipp;
13652     } else {
13653       curr->choice = 7;
13654       curr->num = 1;
13655       cpp = (CharPtr PNTR) MemNew (sizeof (CharPtr) * curr->num);
13656       cpp[0] = StringSave (value);
13657       curr->data.ptrvalue = cpp;
13658     }
13659     if (last == NULL) {
13660       uop->data = curr;
13661     } else {
13662       last->next = curr;
13663     }
13664     rval = TRUE;
13665   }
13666   return rval;
13667 }
13668 
13669 
13670 
13671 
13672 
13673 
13674 
13675 /* The following functions are used for getting and setting various types of data
13676  * in publications.
13677  */
13678 
13679 
13680 static CharPtr legalMonths [] = {
13681   "Jan",
13682   "Feb",
13683   "Mar",
13684   "Apr",
13685   "May",
13686   "Jun",
13687   "Jul",
13688   "Aug",
13689   "Sep",
13690   "Oct",
13691   "Nov",
13692   "Dec",
13693   NULL
13694 };
13695 
13696 
ReadDateFromString(CharPtr date_str)13697 static DatePtr ReadDateFromString (CharPtr date_str)
13698 {
13699   Char      ch;
13700   Int2      i;
13701   CharPtr   ptr1, ptr2, month = NULL, day = NULL, year = NULL;
13702   CharPtr   str;
13703   Int4      day_val = 0;
13704   Uint1     month_num = 0;
13705   long      val;
13706   Int4      year_val = 0;
13707   DatePtr   dp = NULL;
13708   Boolean   critical_error = FALSE;
13709 
13710   if (StringHasNoText (date_str)) return NULL;
13711 
13712   str = StringSave (date_str);
13713   ptr1 = StringChr (str, '-');
13714   if (ptr1 != NULL) {
13715     *ptr1 = '\0';
13716     ptr1++;
13717     ptr2 = StringChr (ptr1, '-');
13718     if (ptr2 != NULL) {
13719       *ptr2 = '\0';
13720       ptr2++;
13721       day = str;
13722       month = ptr1;
13723       year = ptr2;
13724     } else {
13725       month = str;
13726       year = ptr1;
13727     }
13728   } else {
13729     year = str;
13730   }
13731 
13732   if (day != NULL) {
13733     if (sscanf (day, "%ld", &val) != 1 || val < 1 || val > 31) {
13734       critical_error = TRUE;
13735     }
13736     day_val = val;
13737   }
13738 
13739   if (month != NULL) {
13740     for (i = 0; legalMonths [i] != NULL; i++) {
13741       if (StringCmp (month, legalMonths [i]) == 0) {
13742         month_num = i + 1;
13743         break;
13744       }
13745     }
13746     if (legalMonths [i] == NULL) critical_error = TRUE;
13747   }
13748 
13749   if (year != NULL) {
13750     ptr1 = year;
13751     ch = *ptr1;
13752     while (ch != '\0') {
13753       if (! (IS_DIGIT (ch))) critical_error = TRUE;
13754       ptr1++;
13755       ch = *ptr1;
13756     }
13757     if (sscanf (year, "%ld", &val) == 1) {
13758       if (val < 1700 || val > 2100) critical_error = TRUE;
13759       year_val = val - 1900;
13760     }
13761     else
13762     {
13763       critical_error = TRUE;
13764     }
13765   }
13766 
13767   str = MemFree (str);
13768 
13769   if (!critical_error) {
13770     dp = DateNew();
13771     dp->data[0] = 1;
13772     dp->data[1] = (Uint1) year_val;
13773     dp->data[2] = month_num;
13774     dp->data[3] = (Uint1) day_val;
13775   }
13776   return dp;
13777 }
13778 
13779 
GetAuthorStringEx(AuthorPtr author,Boolean use_initials)13780 static CharPtr GetAuthorStringEx (AuthorPtr author, Boolean use_initials)
13781 {
13782   CharPtr str = NULL;
13783   NameStdPtr n;
13784   Int4       len;
13785   Boolean    has_middle = FALSE;
13786 
13787   if (author == NULL || author->name == NULL) return NULL;
13788 
13789   switch (author->name->choice) {
13790     case 1: /* dbtag */
13791       str = GetDbtagString (author->name->data);
13792       break;
13793     case 2: /* name */
13794       n = (NameStdPtr) author->name->data;
13795       if (n != NULL) {
13796         if (use_initials) {
13797           len = StringLen (n->names[0]) + StringLen (n->names[4]) + 2;
13798           str = (CharPtr) MemNew (sizeof (Char) * (len));
13799           sprintf (str, "%s%s", StringHasNoText (n->names[4]) ? "" : n->names[4],
13800                                 StringHasNoText (n->names[0]) ? "" : n->names[0]);
13801         } else {
13802           len = StringLen (n->names[1]) + StringLen (n->names[0]) + 2;
13803           if (StringLen (n->names[4]) > 2) {
13804             len += StringLen (n->names[4]) - 1;
13805             has_middle = TRUE;
13806           }
13807           str = (CharPtr) MemNew (sizeof (Char) * (len));
13808           sprintf (str, "%s%s%s%s%s",
13809                   StringHasNoText (n->names[1]) ? "" : n->names[1],
13810                   StringHasNoText (n->names[1]) ? "" : " ",
13811                   has_middle ? n->names[4] + 2 : "",
13812                   has_middle ? " " : "",
13813                   StringHasNoText (n->names[0]) ? "" : n->names[0]);
13814         }
13815       }
13816       break;
13817     case 3: /* ml */
13818     case 4: /* str */
13819     case 5: /* consortium */
13820       str = StringSave (author->name->data);
13821       break;
13822   }
13823   return str;
13824 }
13825 
13826 
GetAuthorString(AuthorPtr author)13827 static CharPtr GetAuthorString (AuthorPtr author)
13828 {
13829   return GetAuthorStringEx (author, FALSE);
13830 }
13831 
13832 
GetAuthorListStringEx(AuthListPtr alp,StringConstraintPtr scp,Boolean use_initials)13833 static CharPtr GetAuthorListStringEx (AuthListPtr alp, StringConstraintPtr scp, Boolean use_initials)
13834 {
13835   CharPtr str = NULL, tmp;
13836   Int4    len = 0;
13837   ValNodePtr list = NULL, vnp;
13838 
13839   if (alp == NULL) return NULL;
13840 
13841   switch (alp->choice) {
13842     case 1:
13843       for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
13844         tmp = GetAuthorStringEx (vnp->data.ptrvalue, use_initials);
13845         if (tmp != NULL) {
13846           if (DoesStringMatchConstraint (tmp, scp)) {
13847             ValNodeAddPointer (&list, 0, tmp);
13848             len += StringLen (tmp) + 2;
13849           } else {
13850             tmp = MemFree (tmp);
13851           }
13852         }
13853       }
13854       break;
13855     case 2:
13856     case 3:
13857       for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
13858         if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
13859           ValNodeAddPointer (&list, 0, StringSave (vnp->data.ptrvalue));
13860           len += StringLen (vnp->data.ptrvalue) + 2;
13861         }
13862       }
13863       break;
13864   }
13865 
13866   if (len > 0) {
13867     str = (CharPtr) MemNew (sizeof (Char) * (len + 1));
13868     for (vnp = list; vnp != NULL; vnp = vnp->next) {
13869       StringCat (str, vnp->data.ptrvalue);
13870       if (vnp->next != NULL) {
13871         StringCat (str, ", ");
13872       }
13873     }
13874   }
13875   return str;
13876 }
13877 
13878 
GetAuthorListString(AuthListPtr alp,StringConstraintPtr scp)13879 NLM_EXTERN CharPtr GetAuthorListString (AuthListPtr alp, StringConstraintPtr scp)
13880 {
13881   return GetAuthorListStringEx (alp, scp, FALSE);
13882 }
13883 
13884 
RemoveAuthorListString(AuthListPtr alp,StringConstraintPtr scp)13885 static Boolean RemoveAuthorListString (AuthListPtr alp, StringConstraintPtr scp)
13886 {
13887   CharPtr tmp;
13888   Boolean rval = FALSE;
13889   ValNodePtr vnp, vnp_next, vnp_prev = NULL;
13890 
13891   if (alp == NULL) return FALSE;
13892 
13893   switch (alp->choice) {
13894     case 1:
13895       for (vnp = alp->names; vnp != NULL; vnp = vnp_next) {
13896         vnp_next = vnp->next;
13897         tmp = GetAuthorString (vnp->data.ptrvalue);
13898         if (tmp != NULL) {
13899           if (DoesStringMatchConstraint (tmp, scp)) {
13900             if (vnp_prev == NULL) {
13901               alp->names = vnp->next;
13902             } else {
13903               vnp_prev->next = vnp->next;
13904             }
13905             vnp->next = NULL;
13906             vnp->data.ptrvalue = AuthorFree (vnp->data.ptrvalue);
13907             vnp = ValNodeFree (vnp);
13908             rval = TRUE;
13909           } else {
13910             vnp_prev = vnp;
13911           }
13912           tmp = MemFree (tmp);
13913         } else {
13914           vnp_prev = vnp;
13915         }
13916       }
13917       break;
13918     case 2:
13919     case 3:
13920       for (vnp = alp->names; vnp != NULL; vnp = vnp_next) {
13921         vnp_next = vnp->next;
13922         if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
13923           if (vnp_prev == NULL) {
13924             alp->names = vnp->next;
13925           } else {
13926             vnp_prev->next = vnp->next;
13927           }
13928           vnp->next = NULL;
13929           vnp = ValNodeFreeData (vnp);
13930           rval = TRUE;
13931         } else {
13932           vnp_prev = vnp;
13933         }
13934       }
13935       break;
13936   }
13937 
13938   return rval;
13939 }
13940 
13941 
ReadNameFromString(CharPtr str,CharPtr PNTR next_name)13942 static NameStdPtr ReadNameFromString (CharPtr str, CharPtr PNTR next_name)
13943 {
13944   CharPtr cp_end, cp_space;
13945   CharPtr p_repl1 = NULL, p_repl2 = NULL, p_repl3 = NULL;
13946   Char    ch_r1, ch_r2, ch_r3;
13947   NameStdPtr n;
13948 
13949   if (StringHasNoText (str))
13950   {
13951     if (next_name != NULL)
13952     {
13953       *next_name = NULL;
13954     }
13955     return NULL;
13956   }
13957 
13958   /* skip over any leading spaces */
13959   str += StringSpn (str, " \t");
13960 
13961   /* skip over "and" if found */
13962   if (StringNCmp (str, "and ", 4) == 0)
13963   {
13964     str += 4;
13965   }
13966   if (StringHasNoText (str)) {
13967     str = MemFree (str);
13968     return NULL;
13969   }
13970 
13971   cp_end = StringChr (str, ',');
13972   if (cp_end != NULL)
13973   {
13974     p_repl1 = cp_end;
13975     ch_r1 = *p_repl1;
13976     *cp_end = 0;
13977     if (next_name != NULL)
13978     {
13979       if (StringHasNoText (cp_end + 1))
13980       {
13981         *next_name = NULL;
13982       }
13983       else
13984       {
13985         *next_name = cp_end + 1;
13986       }
13987     }
13988   }
13989   else if (next_name != NULL)
13990   {
13991     *next_name = NULL;
13992   }
13993 
13994   n = NameStdNew ();
13995   /* look for elements in name */
13996   cp_space = StringRChr (str, ' ');
13997   if (cp_space == NULL)
13998   {
13999     n->names[0] = StringSave (str);
14000   }
14001   else
14002   {
14003     n->names[0] = StringSave (cp_space + 1);
14004     while (isspace (*cp_space))
14005     {
14006       cp_space--;
14007     }
14008     p_repl2 = cp_space + 1;
14009     ch_r2 = *p_repl2;
14010     *(cp_space + 1) = 0;
14011     cp_space = StringChr (str, ' ');
14012     if (cp_space == NULL)
14013     {
14014        n->names[1] = StringSave (str);
14015        n->names[4] = (CharPtr) MemNew (sizeof (Char) * 3);
14016        sprintf (n->names[4], "%c.", *(n->names[1]));
14017     }
14018     else
14019     {
14020       p_repl3 = cp_space;
14021       ch_r3 = *p_repl3;
14022       *(cp_space) = 0;
14023       n->names[1] = StringSave (str);
14024 
14025       cp_space++;
14026       while (isspace (*cp_space))
14027       {
14028         cp_space++;
14029       }
14030 
14031       n->names[4] = (CharPtr) MemNew (sizeof (Char) * (4 + StringLen (cp_space)));
14032       sprintf (n->names[4], "%c.%s.", *(n->names[1]), cp_space);
14033     }
14034   }
14035 
14036   if (p_repl1 != NULL) {
14037     *p_repl1 = ch_r1;
14038   }
14039   if (p_repl2 != NULL) {
14040     *p_repl2 = ch_r2;
14041   }
14042   if (p_repl3 != NULL) {
14043     *p_repl3 = ch_r3;
14044   }
14045 
14046   return n;
14047 }
14048 
14049 
ReadNameListFromString(CharPtr value)14050 NLM_EXTERN ValNodePtr ReadNameListFromString (CharPtr value)
14051 {
14052   ValNodePtr names = NULL;
14053   AuthorPtr  ap;
14054   NameStdPtr n;
14055   CharPtr    next_cp, cp;
14056 
14057   cp = value;
14058   next_cp = NULL;
14059   while (cp != NULL) {
14060     n = ReadNameFromString (cp, &next_cp);
14061     if (n != NULL) {
14062       ap = AuthorNew ();
14063       ap->name = PersonIdNew ();
14064       ap->name->choice = 2;
14065       ap->name->data = n;
14066       ValNodeAddPointer (&names, 1, ap);
14067     }
14068     cp = next_cp;
14069   }
14070   return names;
14071 }
14072 
14073 
FreeNameList(Uint1 choice,ValNodePtr name_list)14074 static ValNodePtr FreeNameList (Uint1 choice, ValNodePtr name_list)
14075 {
14076   ValNodePtr curr, next;
14077 
14078   curr = name_list;
14079   while (curr != NULL) {
14080     if (choice == 1)    /* std type */
14081         AuthorFree((AuthorPtr) curr->data.ptrvalue);
14082     else                      /* ml or str */
14083         MemFree(curr->data.ptrvalue);
14084 
14085     next = curr->next;
14086     MemFree(curr);
14087     curr = next;
14088   }
14089   return curr;
14090 }
14091 
14092 
14093 
14094 
SetAuthorListFromString(AuthListPtr alp,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)14095 static Boolean SetAuthorListFromString (AuthListPtr alp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
14096 {
14097   ValNodePtr name_list = NULL, vnp, vnp_prev, vnp_next, vnp_tmp;
14098   CharPtr    tmp;
14099   Boolean    rval = FALSE, found, ok_to_set = FALSE;
14100 
14101   if (alp == NULL || StringHasNoText (value)) return FALSE;
14102 
14103   /* can only combine lists if existing list is same type */
14104   if (alp->names == NULL || alp->choice == 1) {
14105     ok_to_set = TRUE;
14106   } else {
14107     switch (existing_text) {
14108       case ExistingTextOption_replace_old:
14109         if (IsStringConstraintEmpty (scp)) {
14110           ok_to_set = TRUE;
14111         }
14112         break;
14113       case ExistingTextOption_append_space:
14114       case ExistingTextOption_append_colon:
14115       case ExistingTextOption_append_none:
14116       case ExistingTextOption_prefix_space:
14117       case ExistingTextOption_prefix_colon:
14118       case ExistingTextOption_prefix_none:
14119         ok_to_set = TRUE;
14120         break;
14121     }
14122   }
14123   if (!ok_to_set) {
14124     return FALSE;
14125   }
14126 
14127   if (alp->names == NULL && IsStringConstraintEmpty (scp)) {
14128     /* no prior values - just add new list */
14129     name_list = ReadNameListFromString (value);
14130     if (name_list != NULL) {
14131       ValNodeLink (&alp->names, name_list);
14132       alp->choice = 1;
14133       rval = TRUE;
14134     }
14135   } else {
14136     switch (existing_text) {
14137       case ExistingTextOption_append_semi:
14138       case ExistingTextOption_append_comma:
14139         name_list = ReadNameListFromString (value);
14140         if (IsStringConstraintEmpty (scp)) {
14141           /* append to list */
14142           ValNodeLink (&(alp->names), name_list);
14143           rval = TRUE;
14144         } else {
14145           /* insert in list after first match */
14146           vnp = alp->names;
14147           found = FALSE;
14148           while (vnp != NULL && !found) {
14149             tmp = GetAuthorString (vnp->data.ptrvalue);
14150             if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) {
14151               found = TRUE;
14152             }
14153             tmp = MemFree (tmp);
14154             if (!found) {
14155               vnp = vnp->next;
14156             }
14157           }
14158           if (found) {
14159             ValNodeLink (&name_list, vnp->next);
14160             vnp->next = name_list;
14161             rval = TRUE;
14162           }
14163         }
14164         break;
14165       case ExistingTextOption_prefix_semi:
14166       case ExistingTextOption_prefix_comma:
14167         name_list = ReadNameListFromString (value);
14168         if (IsStringConstraintEmpty (scp)) {
14169           /* prepend to list */
14170           ValNodeLink (&name_list, alp->names);
14171           alp->names = name_list;
14172           rval = TRUE;
14173         } else {
14174           /* insert in list before first match */
14175           vnp = alp->names;
14176           vnp_prev = NULL;
14177           found = FALSE;
14178           while (vnp != NULL && !found) {
14179             tmp = GetAuthorString (vnp->data.ptrvalue);
14180             if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) {
14181               found = TRUE;
14182             }
14183             tmp = MemFree (tmp);
14184             if (!found) {
14185               vnp_prev = vnp;
14186               vnp = vnp->next;
14187             }
14188           }
14189           if (found) {
14190             if (vnp_prev == NULL) {
14191               ValNodeLink (&name_list, alp->names);
14192               alp->names = name_list;
14193             } else {
14194               ValNodeLink (&name_list, vnp_prev->next);
14195               vnp_prev->next = name_list;
14196             }
14197             rval = TRUE;
14198           }
14199         }
14200         break;
14201       case ExistingTextOption_replace_old:
14202         name_list = ReadNameListFromString (value);
14203         if (IsStringConstraintEmpty (scp)) {
14204           /* replace entire list */
14205           alp->names = FreeNameList (alp->choice, alp->names);
14206           alp->names = name_list;
14207           alp->choice = 1;
14208           rval = TRUE;
14209         } else {
14210           /* replace first author that matches with new match, remove others that match */
14211           vnp = alp->names;
14212           vnp_prev = NULL;
14213           found = FALSE;
14214           while (vnp != NULL) {
14215             vnp_next = vnp->next;
14216             tmp = GetAuthorString (vnp->data.ptrvalue);
14217             if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)) {
14218               if (found) {
14219                 if (vnp_prev == NULL) {
14220                   alp->names = vnp->next;
14221                 } else {
14222                   vnp_prev->next = vnp->next;
14223                 }
14224               } else {
14225                 vnp_tmp = name_list;
14226                 while (vnp_tmp->next != NULL) {
14227                   vnp_tmp = vnp_tmp->next;
14228                 }
14229                 ValNodeLink (&name_list, vnp->next);
14230                 if (vnp_prev == NULL) {
14231                   alp->names = name_list;
14232                 } else {
14233                   vnp_prev->next = name_list;
14234                 }
14235                 vnp_prev = vnp_tmp;
14236                 found = TRUE;
14237                 rval = TRUE;
14238               }
14239               vnp->next = NULL;
14240               vnp = FreeNameList (alp->choice, vnp);
14241             } else {
14242               vnp_prev = vnp;
14243             }
14244             tmp = MemFree (tmp);
14245             vnp = vnp_next;
14246           }
14247         }
14248         break;
14249       case ExistingTextOption_append_space:
14250       case ExistingTextOption_append_colon:
14251       case ExistingTextOption_append_none:
14252       case ExistingTextOption_prefix_space:
14253       case ExistingTextOption_prefix_colon:
14254       case ExistingTextOption_prefix_none:
14255         vnp_prev = NULL;
14256         for (vnp = alp->names; vnp != NULL; vnp = vnp_next) {
14257           vnp_next = vnp->next;
14258           if (alp->choice == 1) {
14259             tmp = GetAuthorString (vnp->data.ptrvalue);
14260             if (tmp != NULL && DoesStringMatchConstraint (tmp, scp)
14261                 && SetStringValue (&tmp, value, existing_text)) {
14262               name_list = ReadNameListFromString (tmp);
14263               if (name_list != NULL) {
14264                 vnp_tmp = name_list;
14265                 while (vnp_tmp->next != NULL) {
14266                   vnp_tmp = vnp_tmp->next;
14267                 }
14268                 ValNodeLink (&name_list, vnp_next);
14269                 if (vnp_prev == NULL) {
14270                   alp->names = name_list;
14271                 } else {
14272                   vnp_prev->next = name_list;
14273                 }
14274                 vnp_prev = vnp_tmp;
14275                 vnp->next = NULL;
14276                 vnp = FreeNameList (alp->choice, vnp);
14277                 rval = TRUE;
14278                 name_list = NULL;
14279               } else {
14280                 vnp_prev = vnp;
14281               }
14282             } else {
14283               vnp_prev = vnp;
14284             }
14285             tmp = MemFree (tmp);
14286           } else {
14287             if (vnp->data.ptrvalue != NULL && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
14288               tmp = (CharPtr) vnp->data.ptrvalue;
14289               rval |= SetStringValue (&tmp, value, existing_text);
14290               vnp->data.ptrvalue = tmp;
14291             }
14292           }
14293         }
14294         break;
14295     }
14296   }
14297   if (!rval && name_list != NULL) {
14298     name_list = FreeNameList (1, vnp);
14299   }
14300   return rval;
14301 }
14302 
14303 
GetPubFieldFromAffil(AffilPtr ap,Int4 field,StringConstraintPtr scp)14304 static CharPtr GetPubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp)
14305 {
14306   CharPtr str = NULL;
14307 
14308   if (ap == NULL) return NULL;
14309 
14310   switch (field) {
14311     case Publication_field_affiliation:
14312       if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) {
14313         str = StringSave (ap->affil);
14314       }
14315       break;
14316     case Publication_field_affil_div:
14317       if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) {
14318         str = StringSave (ap->div);
14319       }
14320       break;
14321     case Publication_field_affil_city:
14322       if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) {
14323         str = StringSave (ap->city);
14324       }
14325       break;
14326     case Publication_field_affil_sub:
14327       if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) {
14328         str = StringSave (ap->sub);
14329       }
14330       break;
14331     case Publication_field_affil_country:
14332       if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) {
14333         str = StringSave (ap->country);
14334       }
14335       break;
14336     case Publication_field_affil_street:
14337       if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) {
14338         str = StringSave (ap->street);
14339       }
14340       break;
14341     case Publication_field_affil_email:
14342       if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) {
14343         str = StringSave (ap->email);
14344       }
14345       break;
14346     case Publication_field_affil_fax:
14347       if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) {
14348         str = StringSave (ap->fax);
14349       }
14350       break;
14351     case Publication_field_affil_phone:
14352       if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) {
14353         str = StringSave (ap->phone);
14354       }
14355       break;
14356     case Publication_field_affil_zipcode:
14357       if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) {
14358         str = StringSave (ap->postal_code);
14359       }
14360       break;
14361   }
14362   return str;
14363 }
14364 
14365 
RemovePubFieldFromAffil(AffilPtr ap,Int4 field,StringConstraintPtr scp)14366 static Boolean RemovePubFieldFromAffil (AffilPtr ap, Int4 field, StringConstraintPtr scp)
14367 {
14368   Boolean rval = FALSE;
14369   if (ap == NULL) return FALSE;
14370 
14371   switch (field) {
14372     case Publication_field_affiliation:
14373       if (!StringHasNoText (ap->affil) && DoesStringMatchConstraint (ap->affil, scp)) {
14374         ap->affil = MemFree (ap->affil);
14375         rval = TRUE;
14376       }
14377       break;
14378     case Publication_field_affil_div:
14379       if (!StringHasNoText (ap->div) && DoesStringMatchConstraint (ap->div, scp)) {
14380         ap->div = MemFree (ap->div);
14381         rval = TRUE;
14382       }
14383       break;
14384     case Publication_field_affil_city:
14385       if (!StringHasNoText (ap->city) && DoesStringMatchConstraint (ap->city, scp)) {
14386         ap->city = MemFree (ap->city);
14387         rval = TRUE;
14388       }
14389       break;
14390     case Publication_field_affil_sub:
14391       if (!StringHasNoText (ap->sub) && DoesStringMatchConstraint (ap->sub, scp)) {
14392         ap->sub = MemFree (ap->sub);
14393         rval = TRUE;
14394       }
14395       break;
14396     case Publication_field_affil_country:
14397       if (!StringHasNoText (ap->country) && DoesStringMatchConstraint (ap->country, scp)) {
14398         ap->country = MemFree (ap->country);
14399         rval = TRUE;
14400       }
14401       break;
14402     case Publication_field_affil_street:
14403       if (!StringHasNoText (ap->street) && DoesStringMatchConstraint (ap->street, scp)) {
14404         ap->street = MemFree (ap->street);
14405         rval = TRUE;
14406       }
14407       break;
14408     case Publication_field_affil_email:
14409       if (!StringHasNoText (ap->email) && DoesStringMatchConstraint (ap->email, scp)) {
14410         ap->email = MemFree (ap->email);
14411         rval = TRUE;
14412       }
14413       break;
14414     case Publication_field_affil_fax:
14415       if (!StringHasNoText (ap->fax) && DoesStringMatchConstraint (ap->fax, scp)) {
14416         ap->fax = MemFree (ap->fax);
14417         rval = TRUE;
14418       }
14419       break;
14420     case Publication_field_affil_phone:
14421       if (!StringHasNoText (ap->phone) && DoesStringMatchConstraint (ap->phone, scp)) {
14422         ap->phone = MemFree (ap->phone);
14423         rval = TRUE;
14424       }
14425       break;
14426     case Publication_field_affil_zipcode:
14427       if (!StringHasNoText (ap->postal_code) && DoesStringMatchConstraint (ap->postal_code, scp)) {
14428         ap->postal_code = MemFree (ap->postal_code);
14429         rval = TRUE;
14430       }
14431       break;
14432   }
14433   return rval;
14434 }
14435 
14436 
SetAffilPubField(AffilPtr ap,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)14437 static Boolean SetAffilPubField (AffilPtr ap, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
14438 {
14439   Boolean rval = FALSE;
14440   if (ap == NULL) return FALSE;
14441 
14442   switch (field) {
14443     case Publication_field_affiliation:
14444       if (!StringHasNoText (ap->affil) || DoesStringMatchConstraint (ap->affil, scp)) {
14445         rval = SetStringValue (&(ap->affil), value, existing_text);
14446       }
14447       break;
14448     case Publication_field_affil_div:
14449       if (!StringHasNoText (ap->div) || DoesStringMatchConstraint (ap->div, scp)) {
14450         rval = SetStringValue (&(ap->div), value, existing_text);
14451       }
14452       break;
14453     case Publication_field_affil_city:
14454       if (!StringHasNoText (ap->city) || DoesStringMatchConstraint (ap->city, scp)) {
14455         rval = SetStringValue (&(ap->city), value, existing_text);
14456       }
14457       break;
14458     case Publication_field_affil_sub:
14459       if (!StringHasNoText (ap->sub) || DoesStringMatchConstraint (ap->sub, scp)) {
14460         rval = SetStringValue (&(ap->sub), value, existing_text);
14461       }
14462       break;
14463     case Publication_field_affil_country:
14464       if (!StringHasNoText (ap->country) || DoesStringMatchConstraint (ap->country, scp)) {
14465         rval = SetStringValue (&(ap->country), value, existing_text);
14466       }
14467       break;
14468     case Publication_field_affil_street:
14469       if (!StringHasNoText (ap->street) || DoesStringMatchConstraint (ap->street, scp)) {
14470         rval = SetStringValue (&(ap->street), value, existing_text);
14471       }
14472       break;
14473     case Publication_field_affil_email:
14474       if (!StringHasNoText (ap->email) || DoesStringMatchConstraint (ap->email, scp)) {
14475         rval = SetStringValue (&(ap->email), value, existing_text);
14476       }
14477       break;
14478     case Publication_field_affil_fax:
14479       if (!StringHasNoText (ap->fax) || DoesStringMatchConstraint (ap->fax, scp)) {
14480         rval = SetStringValue (&(ap->fax), value, existing_text);
14481       }
14482       break;
14483     case Publication_field_affil_phone:
14484       if (!StringHasNoText (ap->phone) || DoesStringMatchConstraint (ap->phone, scp)) {
14485         rval = SetStringValue (&(ap->phone), value, existing_text);
14486       }
14487       break;
14488     case Publication_field_affil_zipcode:
14489       if (!StringHasNoText (ap->postal_code) || DoesStringMatchConstraint (ap->postal_code, scp)) {
14490         rval = SetStringValue (&(ap->postal_code), value, existing_text);
14491       }
14492       break;
14493   }
14494   return rval;
14495 }
14496 
14497 
GetPubFieldFromImprint(ImprintPtr imprint,Int4 field,StringConstraintPtr scp)14498 static CharPtr GetPubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp)
14499 {
14500   CharPtr str = NULL;
14501   if (imprint == NULL) return NULL;
14502 
14503   switch (field) {
14504     case Publication_field_volume:
14505       if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) {
14506         str = StringSave (imprint->volume);
14507       }
14508       break;
14509     case Publication_field_issue:
14510       if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) {
14511         str = StringSave (imprint->issue);
14512       }
14513       break;
14514     case Publication_field_pages:
14515       if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) {
14516         str = StringSave (imprint->pages);
14517       }
14518       break;
14519     case Publication_field_date:
14520       if (imprint->date != NULL) {
14521         str = PrintPartialOrCompleteDate (imprint->date);
14522         if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
14523           str = MemFree (str);
14524         }
14525       }
14526       break;
14527   }
14528   return str;
14529 }
14530 
14531 
RemovePubDate(DatePtr PNTR pDate,StringConstraintPtr scp)14532 static Boolean RemovePubDate (DatePtr PNTR pDate, StringConstraintPtr scp)
14533 {
14534   CharPtr str;
14535   Boolean rval = FALSE;
14536 
14537   if (pDate == NULL || *pDate == NULL) {
14538     return FALSE;
14539   }
14540 
14541   str = PrintPartialOrCompleteDate (*pDate);
14542   if (!StringHasNoText (str) && DoesStringMatchConstraint (str, scp)) {
14543     *pDate = DateFree (*pDate);
14544     rval = TRUE;
14545   }
14546   str = MemFree (str);
14547   return rval;
14548 }
14549 
14550 
SetPubDate(DatePtr PNTR pDate,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)14551 static Boolean SetPubDate (DatePtr PNTR pDate, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
14552 {
14553   CharPtr tmp;
14554   DatePtr dp = NULL;
14555   Boolean made_new_date = FALSE;
14556   Boolean rval = FALSE;
14557 
14558   if (pDate == NULL) {
14559     return FALSE;
14560   }
14561   if (*pDate == NULL) {
14562     *pDate = DateNew();
14563     made_new_date = TRUE;
14564   }
14565   tmp = PrintPartialOrCompleteDate (*pDate);
14566   if (DoesStringMatchConstraint (tmp, scp)
14567       && SetStringValue (&tmp, value, existing_text)) {
14568     dp = ReadDateFromString (tmp);
14569     if (dp != NULL) {
14570       *pDate = DateFree (*pDate);
14571       *pDate = dp;
14572       rval = TRUE;
14573     }
14574   }
14575   tmp = MemFree (tmp);
14576   if (!rval && made_new_date) {
14577     *pDate = DateFree (*pDate);
14578   }
14579   return rval;
14580 }
14581 
14582 
RemovePubFieldFromImprint(ImprintPtr imprint,Int4 field,StringConstraintPtr scp)14583 static Boolean RemovePubFieldFromImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp)
14584 {
14585   Boolean rval = FALSE;
14586   if (imprint == NULL) return FALSE;
14587 
14588   switch (field) {
14589     case Publication_field_volume:
14590       if (!StringHasNoText (imprint->volume) && DoesStringMatchConstraint (imprint->volume, scp)) {
14591         imprint->volume = MemFree (imprint->volume);
14592         rval = TRUE;
14593       }
14594       break;
14595     case Publication_field_issue:
14596       if (!StringHasNoText (imprint->issue) && DoesStringMatchConstraint (imprint->issue, scp)) {
14597         imprint->issue = MemFree (imprint->issue);
14598         rval = TRUE;
14599       }
14600       break;
14601     case Publication_field_pages:
14602       if (!StringHasNoText (imprint->pages) && DoesStringMatchConstraint (imprint->pages, scp)) {
14603         imprint->pages = MemFree (imprint->pages);
14604         rval = TRUE;
14605       }
14606       break;
14607     case Publication_field_date:
14608       rval = RemovePubDate (&(imprint->date), scp);
14609       break;
14610   }
14611   return rval;
14612 }
14613 
14614 
SetPubFieldOnImprint(ImprintPtr imprint,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)14615 static Boolean SetPubFieldOnImprint (ImprintPtr imprint, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
14616 {
14617   Boolean rval = FALSE;
14618 
14619   if (imprint == NULL) return FALSE;
14620 
14621   switch (field) {
14622     case Publication_field_volume:
14623       if (DoesStringMatchConstraint (imprint->volume, scp)) {
14624         rval = SetStringValue (&(imprint->volume), value, existing_text);
14625       }
14626       break;
14627     case Publication_field_issue:
14628       if (StringHasNoText (imprint->issue) || DoesStringMatchConstraint (imprint->issue, scp)) {
14629         rval = SetStringValue (&(imprint->issue), value, existing_text);
14630       }
14631       break;
14632     case Publication_field_pages:
14633       if (StringHasNoText (imprint->pages) || DoesStringMatchConstraint (imprint->pages, scp)) {
14634         rval = SetStringValue (&(imprint->pages), value, existing_text);
14635       }
14636       break;
14637     case Publication_field_date:
14638       rval = SetPubDate (&(imprint->date), scp, value, existing_text);
14639       break;
14640   }
14641   return rval;
14642 }
14643 
14644 
SetValNodeChoices(ValNodePtr list,Uint1 new_choice)14645 static void SetValNodeChoices (ValNodePtr list, Uint1 new_choice)
14646 {
14647   while (list != NULL) {
14648     list->choice = new_choice;
14649     list = list->next;
14650   }
14651 }
14652 
14653 
GetPubFieldFromCitJour(CitJourPtr cjp,Int4 field,StringConstraintPtr scp)14654 static CharPtr GetPubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp)
14655 {
14656   CharPtr str = NULL;
14657   if (cjp == NULL) return NULL;
14658 
14659   switch (field) {
14660     case Publication_field_journal:
14661       str = GetFirstValNodeStringMatch (cjp->title, scp);
14662       break;
14663     case Publication_field_volume:
14664     case Publication_field_issue:
14665     case Publication_field_pages:
14666     case Publication_field_date:
14667       str = GetPubFieldFromImprint (cjp->imp, field, scp);
14668       break;
14669   }
14670 
14671   return str;
14672 }
14673 
14674 
RemovePubFieldFromCitJour(CitJourPtr cjp,Int4 field,StringConstraintPtr scp)14675 static Boolean RemovePubFieldFromCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp)
14676 {
14677   Boolean rval = FALSE;
14678   if (cjp == NULL) return FALSE;
14679 
14680   switch (field) {
14681     case Publication_field_journal:
14682       rval = RemoveValNodeStringMatch (&(cjp->title), scp);
14683       break;
14684     case Publication_field_volume:
14685     case Publication_field_issue:
14686     case Publication_field_pages:
14687     case Publication_field_date:
14688       rval = RemovePubFieldFromImprint (cjp->imp, field, scp);
14689       break;
14690   }
14691 
14692   return rval;
14693 }
14694 
14695 
SetPubFieldOnCitJour(CitJourPtr cjp,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)14696 static Boolean SetPubFieldOnCitJour (CitJourPtr cjp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
14697 {
14698   Boolean rval = FALSE;
14699   if (cjp == NULL) return FALSE;
14700 
14701   switch (field) {
14702     case Publication_field_journal:
14703       rval = SetStringsInValNodeStringList (&(cjp->title), scp, value, existing_text);
14704       SetValNodeChoices (cjp->title, 1);
14705       break;
14706     case Publication_field_volume:
14707     case Publication_field_issue:
14708     case Publication_field_pages:
14709     case Publication_field_date:
14710       rval = SetPubFieldOnImprint (cjp->imp, field, scp, value, existing_text);
14711       break;
14712   }
14713 
14714   return rval;
14715 }
14716 
14717 
GetPubFieldFromCitBook(CitBookPtr cbp,Int4 field,StringConstraintPtr scp)14718 static CharPtr GetPubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp)
14719 {
14720   CharPtr str = NULL;
14721 
14722   if (cbp == NULL) return NULL;
14723 
14724   switch (field) {
14725     case Publication_field_title:
14726       str = GetFirstValNodeStringMatch (cbp->title, scp);
14727       break;
14728     case Publication_field_authors:
14729       str = GetAuthorListString (cbp->authors, scp);
14730       break;
14731     case Publication_field_authors_initials:
14732       str = GetAuthorListStringEx (cbp->authors, scp, TRUE);
14733       break;
14734     case Publication_field_affiliation:
14735     case Publication_field_affil_div:
14736     case Publication_field_affil_city:
14737     case Publication_field_affil_sub:
14738     case Publication_field_affil_country:
14739     case Publication_field_affil_street:
14740     case Publication_field_affil_email:
14741     case Publication_field_affil_fax:
14742     case Publication_field_affil_phone:
14743     case Publication_field_affil_zipcode:
14744       if (cbp->authors != NULL) {
14745         str = GetPubFieldFromAffil (cbp->authors->affil, field, scp);
14746       }
14747       break;
14748     case Publication_field_volume:
14749     case Publication_field_issue:
14750     case Publication_field_pages:
14751     case Publication_field_date:
14752       str = GetPubFieldFromImprint (cbp->imp, field, scp);
14753       break;
14754   }
14755 
14756   return str;
14757 }
14758 
14759 
RemovePubFieldFromCitBook(CitBookPtr cbp,Int4 field,StringConstraintPtr scp)14760 static Boolean RemovePubFieldFromCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp)
14761 {
14762   Boolean rval = FALSE;
14763 
14764   if (cbp == NULL) return FALSE;
14765 
14766   switch (field) {
14767     case Publication_field_title:
14768       rval = RemoveValNodeStringMatch (&(cbp->title), scp);
14769       break;
14770     case Publication_field_authors:
14771       rval = RemoveAuthorListString (cbp->authors, scp);
14772       break;
14773     case Publication_field_affiliation:
14774     case Publication_field_affil_div:
14775     case Publication_field_affil_city:
14776     case Publication_field_affil_sub:
14777     case Publication_field_affil_country:
14778     case Publication_field_affil_street:
14779     case Publication_field_affil_email:
14780     case Publication_field_affil_fax:
14781     case Publication_field_affil_phone:
14782     case Publication_field_affil_zipcode:
14783       if (cbp->authors != NULL) {
14784         rval = RemovePubFieldFromAffil(cbp->authors->affil, field, scp);
14785       }
14786       break;
14787     case Publication_field_volume:
14788     case Publication_field_issue:
14789     case Publication_field_pages:
14790     case Publication_field_date:
14791       rval = RemovePubFieldFromImprint (cbp->imp, field, scp);
14792       break;
14793   }
14794 
14795   return rval;
14796 }
14797 
14798 
SetPubFieldOnCitBook(CitBookPtr cbp,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)14799 static Boolean SetPubFieldOnCitBook (CitBookPtr cbp, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
14800 {
14801   Boolean rval = FALSE;
14802 
14803   if (cbp == NULL) return FALSE;
14804 
14805   switch (field) {
14806     case Publication_field_title:
14807       rval = SetStringsInValNodeStringList (&(cbp->title), scp, value, existing_text);
14808       SetValNodeChoices (cbp->title, 1);
14809       break;
14810     case Publication_field_authors:
14811       if (cbp->authors == NULL) {
14812         cbp->authors = AuthListNew();
14813       }
14814       rval = SetAuthorListFromString (cbp->authors, scp, value, existing_text);
14815       break;
14816     case Publication_field_affiliation:
14817     case Publication_field_affil_div:
14818     case Publication_field_affil_city:
14819     case Publication_field_affil_sub:
14820     case Publication_field_affil_country:
14821     case Publication_field_affil_street:
14822     case Publication_field_affil_email:
14823     case Publication_field_affil_fax:
14824     case Publication_field_affil_phone:
14825     case Publication_field_affil_zipcode:
14826       if (cbp->authors == NULL) {
14827         cbp->authors = AuthListNew();
14828       }
14829       if (cbp->authors->affil == NULL) {
14830         cbp->authors->affil = AffilNew();
14831       }
14832       rval = SetAffilPubField (cbp->authors->affil, field, scp, value, existing_text);
14833       break;
14834     case Publication_field_volume:
14835     case Publication_field_issue:
14836     case Publication_field_pages:
14837     case Publication_field_date:
14838       if (cbp->imp == NULL) {
14839         cbp->imp = ImprintNew();
14840       }
14841       rval = SetPubFieldOnImprint (cbp->imp, field, scp, value, existing_text);
14842       break;
14843   }
14844 
14845   return rval;
14846 }
14847 
14848 
GetPubFieldFromPub(PubPtr the_pub,Int4 field,StringConstraintPtr scp)14849 NLM_EXTERN CharPtr GetPubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp)
14850 {
14851   CitGenPtr    cgp;
14852   CitArtPtr    cap;
14853   CitBookPtr   cbp;
14854   CitPatPtr    cpp;
14855   CitSubPtr    csp;
14856   CitJourPtr   cjp;
14857   CharPtr      str = NULL;
14858 
14859   if (the_pub == NULL || the_pub->data.ptrvalue == NULL) return NULL;
14860 
14861   if (field == Publication_field_pub_class) {
14862     return GetPubclassFromPub(the_pub);
14863   }
14864 
14865   switch (the_pub->choice) {
14866     case PUB_Gen :
14867       cgp = (CitGenPtr) the_pub->data.ptrvalue;
14868       switch (field) {
14869         case Publication_field_cit:
14870           if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) {
14871             str = StringSave (cgp->cit);
14872           }
14873           break;
14874         case Publication_field_authors:
14875           str = GetAuthorListString (cgp->authors, scp);
14876           break;
14877         case Publication_field_authors_initials:
14878           str = GetAuthorListStringEx (cgp->authors, scp, TRUE);
14879           break;
14880         case Publication_field_affiliation:
14881         case Publication_field_affil_div:
14882         case Publication_field_affil_city:
14883         case Publication_field_affil_sub:
14884         case Publication_field_affil_country:
14885         case Publication_field_affil_street:
14886         case Publication_field_affil_email:
14887         case Publication_field_affil_fax:
14888         case Publication_field_affil_phone:
14889         case Publication_field_affil_zipcode:
14890           if (cgp->authors != NULL && cgp->authors->affil != NULL) {
14891             str = GetPubFieldFromAffil (cgp->authors->affil, field, scp);
14892           }
14893           break;
14894         case Publication_field_journal:
14895           str = GetFirstValNodeStringMatch (cgp->journal, scp);
14896           break;
14897         case Publication_field_volume:
14898           if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) {
14899             str = StringSave (cgp->volume);
14900           }
14901           break;
14902         case Publication_field_issue:
14903           if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) {
14904             str = StringSave (cgp->issue);
14905           }
14906           break;
14907         case Publication_field_pages:
14908           if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) {
14909             str = StringSave (cgp->pages);
14910           }
14911           break;
14912         case Publication_field_date:
14913           if (cgp->date != NULL) {
14914             str = PrintPartialOrCompleteDate (cgp->date);
14915             if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
14916               str = MemFree (str);
14917             }
14918           }
14919           break;
14920         case Publication_field_serial_number:
14921           str = GetInt2ValueFromString (cgp->serial_number, scp);
14922           break;
14923         case Publication_field_title:
14924           if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) {
14925             str = StringSave (cgp->title);
14926           }
14927           break;
14928       }
14929       break;
14930     case PUB_Sub :
14931       csp = (CitSubPtr) the_pub->data.ptrvalue;
14932       switch (field) {
14933         case Publication_field_title:
14934           if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) {
14935             str = StringSave (csp->descr);
14936           }
14937           break;
14938         case Publication_field_authors:
14939           str = GetAuthorListString (csp->authors, scp);
14940           break;
14941         case Publication_field_authors_initials:
14942           str = GetAuthorListStringEx (csp->authors, scp, TRUE);
14943           break;
14944         case Publication_field_affiliation:
14945         case Publication_field_affil_div:
14946         case Publication_field_affil_city:
14947         case Publication_field_affil_sub:
14948         case Publication_field_affil_country:
14949         case Publication_field_affil_street:
14950         case Publication_field_affil_email:
14951         case Publication_field_affil_fax:
14952         case Publication_field_affil_phone:
14953         case Publication_field_affil_zipcode:
14954           if (csp->authors != NULL) {
14955             str = GetPubFieldFromAffil (csp->authors->affil, field, scp);
14956           }
14957           break;
14958         case Publication_field_date:
14959           str = PrintPartialOrCompleteDate (csp->date);
14960           if (StringHasNoText (str) || !DoesStringMatchConstraint (str, scp)) {
14961             str = MemFree (str);
14962           }
14963           break;
14964       }
14965       break;
14966     case PUB_Article :
14967       cap = (CitArtPtr) the_pub->data.ptrvalue;
14968       switch (field) {
14969         case Publication_field_title:
14970           str = GetFirstValNodeStringMatch (cap->title, scp);
14971           break;
14972         case Publication_field_authors:
14973           str = GetAuthorListString (cap->authors, scp);
14974           break;
14975         case Publication_field_authors_initials:
14976           str = GetAuthorListStringEx (cap->authors, scp, TRUE);
14977           break;
14978         case Publication_field_affiliation:
14979         case Publication_field_affil_div:
14980         case Publication_field_affil_city:
14981         case Publication_field_affil_sub:
14982         case Publication_field_affil_country:
14983         case Publication_field_affil_street:
14984         case Publication_field_affil_email:
14985         case Publication_field_affil_fax:
14986         case Publication_field_affil_phone:
14987         case Publication_field_affil_zipcode:
14988           if (cap->authors != NULL) {
14989             str = GetPubFieldFromAffil (cap->authors->affil, field, scp);
14990           }
14991           break;
14992         default:
14993           if (cap->from == 1) {
14994             str = GetPubFieldFromCitJour (cap->fromptr, field, scp);
14995           } else if (cap->from == 2) {
14996             str = GetPubFieldFromCitBook (cap->fromptr, field, scp);
14997           }
14998           break;
14999       }
15000       break;
15001     case PUB_Journal:
15002       cjp = (CitJourPtr) the_pub->data.ptrvalue;
15003       str = GetPubFieldFromCitJour (cjp, field, scp);
15004       break;
15005     case PUB_Book :
15006     case PUB_Man :
15007       cbp = (CitBookPtr) the_pub->data.ptrvalue;
15008       str = GetPubFieldFromCitBook (cbp, field, scp);
15009       break;
15010     case PUB_Patent :
15011       cpp = (CitPatPtr) the_pub->data.ptrvalue;
15012       switch (field) {
15013         case Publication_field_title:
15014           if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) {
15015             str = StringSave (cpp->title);
15016           }
15017           break;
15018         case Publication_field_authors:
15019           str = GetAuthorListString (cpp->authors, scp);
15020           break;
15021         case Publication_field_authors_initials:
15022           str = GetAuthorListStringEx (cpp->authors, scp, TRUE);
15023           break;
15024         case Publication_field_affiliation:
15025         case Publication_field_affil_div:
15026         case Publication_field_affil_city:
15027         case Publication_field_affil_sub:
15028         case Publication_field_affil_country:
15029         case Publication_field_affil_street:
15030         case Publication_field_affil_email:
15031         case Publication_field_affil_fax:
15032         case Publication_field_affil_phone:
15033         case Publication_field_affil_zipcode:
15034           if (cpp->authors != NULL) {
15035             str = GetPubFieldFromAffil (cpp->authors->affil, field, scp);
15036           }
15037           break;
15038       }
15039       break;
15040     case PUB_PMid:
15041       if (field == Publication_field_pmid) {
15042         str = (CharPtr) MemNew (sizeof (Char) * 15);
15043         sprintf (str, "%d", the_pub->data.intvalue);
15044       }
15045       break;
15046     default :
15047       break;
15048   }
15049   return str;
15050 }
15051 
15052 
RemovePMIDOnCitArt(CitArtPtr cap,StringConstraintPtr scp)15053 static Boolean RemovePMIDOnCitArt (CitArtPtr cap, StringConstraintPtr scp)
15054 {
15055   Boolean    rval = FALSE;
15056   ValNodePtr vnp, vnp_prev = NULL, vnp_next;
15057 
15058   if (cap == NULL) {
15059     return FALSE;
15060   }
15061 
15062   for (vnp = cap->ids; vnp != NULL; vnp = vnp_next) {
15063     vnp_next = vnp->next;
15064     if (vnp->choice == ARTICLEID_PUBMED && DoesNumberMatchStringConstraint (vnp->data.intvalue, scp)) {
15065       if (vnp_prev == NULL) {
15066         cap->ids->next = vnp_next;
15067       } else {
15068         vnp_prev->next = vnp_next;
15069       }
15070       vnp->next = NULL;
15071       vnp = ArticleIdFree (vnp);
15072       rval = TRUE;
15073     } else {
15074       vnp_prev = vnp;
15075     }
15076   }
15077   return rval;
15078 }
15079 
15080 
RemovePubFieldFromPub(PubPtr the_pub,Int4 field,StringConstraintPtr scp)15081 static Boolean RemovePubFieldFromPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp)
15082 {
15083   CitGenPtr    cgp;
15084   CitArtPtr    cap;
15085   CitBookPtr   cbp;
15086   CitPatPtr    cpp;
15087   CitSubPtr    csp;
15088   Boolean      rval = FALSE;
15089   Char         num[15];
15090 
15091   if (the_pub == NULL) return FALSE;
15092 
15093   if (field == Publication_field_pub_class) {
15094     return SetPubclassOnPub(the_pub, "unpublished");
15095   }
15096 
15097   switch (the_pub->choice) {
15098     case PUB_Gen :
15099       cgp = (CitGenPtr) the_pub->data.ptrvalue;
15100       switch (field) {
15101         case Publication_field_cit:
15102           if (!StringHasNoText (cgp->cit) && DoesStringMatchConstraint (cgp->title, scp)) {
15103             cgp->cit = MemFree (cgp->cit);
15104             rval = TRUE;
15105           }
15106           break;
15107         case Publication_field_authors:
15108           rval = RemoveAuthorListString (cgp->authors, scp);
15109           break;
15110         case Publication_field_affiliation:
15111         case Publication_field_affil_div:
15112         case Publication_field_affil_city:
15113         case Publication_field_affil_sub:
15114         case Publication_field_affil_country:
15115         case Publication_field_affil_street:
15116         case Publication_field_affil_email:
15117         case Publication_field_affil_fax:
15118         case Publication_field_affil_phone:
15119         case Publication_field_affil_zipcode:
15120           if (cgp->authors != NULL) {
15121             rval = RemovePubFieldFromAffil(cgp->authors->affil, field, scp);
15122           }
15123           break;
15124         case Publication_field_journal:
15125           rval = RemoveValNodeStringMatch (&(cgp->journal), scp);
15126           break;
15127         case Publication_field_volume:
15128           if (!StringHasNoText (cgp->volume) && DoesStringMatchConstraint (cgp->volume, scp)) {
15129             cgp->volume = MemFree (cgp->volume);
15130             rval = TRUE;
15131           }
15132           break;
15133         case Publication_field_issue:
15134           if (!StringHasNoText (cgp->issue) && DoesStringMatchConstraint (cgp->issue, scp)) {
15135             cgp->issue = MemFree (cgp->issue);
15136             rval = TRUE;
15137           }
15138           break;
15139         case Publication_field_pages:
15140           if (!StringHasNoText (cgp->pages) && DoesStringMatchConstraint (cgp->pages, scp)) {
15141             cgp->pages = MemFree (cgp->pages);
15142             rval = TRUE;
15143           }
15144           break;
15145         case Publication_field_date:
15146           rval = RemovePubDate (&(cgp->date), scp);
15147           break;
15148         case Publication_field_serial_number:
15149           if (cgp->serial_number > 0) {
15150             sprintf (num, "%d", cgp->serial_number);
15151             if (DoesStringMatchConstraint (num, scp)) {
15152               cgp->serial_number = 0;
15153               rval = TRUE;
15154             }
15155           }
15156           break;
15157         case Publication_field_title:
15158           if (!StringHasNoText (cgp->title) && DoesStringMatchConstraint (cgp->title, scp)) {
15159             cgp->title = MemFree (cgp->title);
15160             rval = TRUE;
15161           }
15162           break;
15163       }
15164       break;
15165     case PUB_Sub :
15166       csp = (CitSubPtr) the_pub->data.ptrvalue;
15167       switch (field) {
15168         case Publication_field_title:
15169           if (!StringHasNoText (csp->descr) && DoesStringMatchConstraint (csp->descr, scp)) {
15170             csp->descr = MemFree (csp->descr);
15171             rval = TRUE;
15172           }
15173           break;
15174         case Publication_field_authors:
15175           rval = RemoveAuthorListString (csp->authors, scp);
15176           break;
15177         case Publication_field_affiliation:
15178         case Publication_field_affil_div:
15179         case Publication_field_affil_city:
15180         case Publication_field_affil_sub:
15181         case Publication_field_affil_country:
15182         case Publication_field_affil_street:
15183         case Publication_field_affil_email:
15184         case Publication_field_affil_fax:
15185         case Publication_field_affil_phone:
15186         case Publication_field_affil_zipcode:
15187           if (csp->authors != NULL) {
15188             rval = RemovePubFieldFromAffil(csp->authors->affil, field, scp);
15189           }
15190           break;
15191         case Publication_field_date:
15192           rval = RemovePubDate (&(csp->date), scp);
15193           break;
15194       }
15195       break;
15196     case PUB_Article :
15197       cap = (CitArtPtr) the_pub->data.ptrvalue;
15198       switch (field) {
15199         case Publication_field_pmid:
15200           rval = RemovePMIDOnCitArt (cap, scp);
15201           break;
15202         case Publication_field_title:
15203           rval = RemoveValNodeStringMatch (&(cap->title), scp);
15204           break;
15205         case Publication_field_authors:
15206           rval = RemoveAuthorListString (cap->authors, scp);
15207           break;
15208         case Publication_field_affiliation:
15209         case Publication_field_affil_div:
15210         case Publication_field_affil_city:
15211         case Publication_field_affil_sub:
15212         case Publication_field_affil_country:
15213         case Publication_field_affil_street:
15214         case Publication_field_affil_email:
15215         case Publication_field_affil_fax:
15216         case Publication_field_affil_phone:
15217         case Publication_field_affil_zipcode:
15218           if (cap->authors != NULL) {
15219             rval = RemovePubFieldFromAffil(cap->authors->affil, field, scp);
15220           }
15221           break;
15222         default:
15223           if (cap->from == 1) {
15224             rval = RemovePubFieldFromCitJour (cap->fromptr, field, scp);
15225           } else if (cap->from == 2) {
15226             rval = RemovePubFieldFromCitBook (cap->fromptr, field, scp);
15227           }
15228           break;
15229       }
15230       break;
15231     case PUB_Journal:
15232       rval = RemovePubFieldFromCitJour (the_pub->data.ptrvalue, field, scp);
15233       break;
15234     case PUB_Book :
15235     case PUB_Man :
15236       cbp = (CitBookPtr) the_pub->data.ptrvalue;
15237       rval = RemovePubFieldFromCitBook (cbp, field, scp);
15238       break;
15239     case PUB_Patent :
15240       cpp = (CitPatPtr) the_pub->data.ptrvalue;
15241       switch (field) {
15242         case Publication_field_title:
15243           if (!StringHasNoText (cpp->title) && DoesStringMatchConstraint (cpp->title, scp)) {
15244             cpp->title = MemFree (cpp->title);
15245             rval = TRUE;
15246           }
15247           break;
15248         case Publication_field_authors:
15249           rval = RemoveAuthorListString (cpp->authors, scp);
15250           break;
15251         case Publication_field_affiliation:
15252         case Publication_field_affil_div:
15253         case Publication_field_affil_city:
15254         case Publication_field_affil_sub:
15255         case Publication_field_affil_country:
15256         case Publication_field_affil_street:
15257         case Publication_field_affil_email:
15258         case Publication_field_affil_fax:
15259         case Publication_field_affil_phone:
15260         case Publication_field_affil_zipcode:
15261           if (cpp->authors != NULL) {
15262             rval = RemovePubFieldFromAffil(cpp->authors->affil, field, scp);
15263           }
15264           break;
15265       }
15266       break;
15267     case PUB_PMid:
15268       if (field == Publication_field_pmid) {
15269         the_pub->data.intvalue = 0;
15270       }
15271       break;
15272     default :
15273       break;
15274   }
15275   return rval;
15276 }
15277 
15278 
SetPMIDOnCitArt(CitArtPtr cap,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)15279 static Boolean SetPMIDOnCitArt (CitArtPtr cap, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
15280 {
15281   Boolean found = FALSE, rval = FALSE;
15282   ValNodePtr vnp;
15283 
15284   if (cap == NULL || !StringIsAllDigits(value)) {
15285     return FALSE;
15286   }
15287 
15288   for (vnp = cap->ids; vnp != NULL; vnp = vnp->next) {
15289     if (vnp->choice == ARTICLEID_PUBMED) {
15290       found = TRUE;
15291       if (existing_text == ExistingTextOption_replace_old && DoesNumberMatchStringConstraint(vnp->data.intvalue, scp)) {
15292         vnp->data.intvalue = atoi (value);
15293         rval = TRUE;
15294       }
15295     }
15296   }
15297   if (!found && IsStringConstraintEmpty (scp)) {
15298     ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, atoi (value));
15299     rval = TRUE;
15300   }
15301   return rval;
15302 }
15303 
15304 
SetPubFieldOnPub(PubPtr the_pub,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)15305 static Boolean SetPubFieldOnPub (PubPtr the_pub, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
15306 {
15307   CitGenPtr    cgp;
15308   CitArtPtr    cap;
15309   CitBookPtr   cbp;
15310   CitPatPtr    cpp;
15311   CitSubPtr    csp;
15312   Boolean      rval = FALSE;
15313 
15314   if (the_pub == NULL || value == NULL) return FALSE;
15315 
15316   if (field == Publication_field_pub_class) {
15317     return SetPubclassOnPub(the_pub, value);
15318   }
15319 
15320   switch (the_pub->choice) {
15321     case PUB_Gen :
15322       cgp = (CitGenPtr) the_pub->data.ptrvalue;
15323       switch (field) {
15324         case Publication_field_cit:
15325           if (DoesStringMatchConstraint (cgp->cit, scp)) {
15326             rval = SetStringValue ( &(cgp->cit), value, existing_text);
15327           }
15328           break;
15329         case Publication_field_authors:
15330           if (cgp->authors == NULL) {
15331             cgp->authors = AuthListNew();
15332           }
15333           rval = SetAuthorListFromString (cgp->authors, scp, value, existing_text);
15334           break;
15335         case Publication_field_affiliation:
15336         case Publication_field_affil_div:
15337         case Publication_field_affil_city:
15338         case Publication_field_affil_sub:
15339         case Publication_field_affil_country:
15340         case Publication_field_affil_street:
15341         case Publication_field_affil_email:
15342         case Publication_field_affil_fax:
15343         case Publication_field_affil_phone:
15344         case Publication_field_affil_zipcode:
15345           if (cgp->authors == NULL) {
15346             cgp->authors = AuthListNew();
15347           }
15348           if (cgp->authors->affil == NULL) {
15349             cgp->authors->affil = AffilNew();
15350           }
15351           rval = SetAffilPubField (cgp->authors->affil, field, scp, value, existing_text);
15352           break;
15353         case Publication_field_journal:
15354           rval = SetStringsInValNodeStringList (&(cgp->journal), scp, value, existing_text);
15355           SetValNodeChoices (cgp->journal, 1);
15356           break;
15357         case Publication_field_volume:
15358           if (DoesStringMatchConstraint (cgp->volume, scp)) {
15359             rval = SetStringValue ( &(cgp->volume), value, existing_text);
15360           }
15361           break;
15362         case Publication_field_issue:
15363           if (DoesStringMatchConstraint (cgp->issue, scp)) {
15364             rval = SetStringValue ( &(cgp->issue), value, existing_text);
15365           }
15366           break;
15367         case Publication_field_pages:
15368           if (DoesStringMatchConstraint (cgp->pages, scp)) {
15369             rval = SetStringValue ( &(cgp->pages), value, existing_text);
15370           }
15371           break;
15372         case Publication_field_date:
15373           rval = SetPubDate (&(cgp->date), scp, value, existing_text);
15374           break;
15375         case Publication_field_serial_number:
15376           rval = SetInt2ValueWithString (&(cgp->serial_number), value, existing_text);
15377           break;
15378         case Publication_field_title:
15379           if (DoesStringMatchConstraint(cgp->title, scp)) {
15380             rval = SetStringValue ( &(cgp->title), value, existing_text);
15381           }
15382           break;
15383       }
15384       break;
15385     case PUB_Sub :
15386       csp = (CitSubPtr) the_pub->data.ptrvalue;
15387       switch (field) {
15388         case Publication_field_title:
15389           if (DoesStringMatchConstraint (csp->descr, scp)) {
15390             rval = SetStringValue (&(csp->descr), value, existing_text);
15391           }
15392           break;
15393         case Publication_field_authors:
15394           if (csp->authors == NULL) {
15395             csp->authors = AuthListNew();
15396           }
15397           rval = SetAuthorListFromString (csp->authors, scp, value, existing_text);
15398           break;
15399         case Publication_field_affiliation:
15400         case Publication_field_affil_div:
15401         case Publication_field_affil_city:
15402         case Publication_field_affil_sub:
15403         case Publication_field_affil_country:
15404         case Publication_field_affil_street:
15405         case Publication_field_affil_email:
15406         case Publication_field_affil_fax:
15407         case Publication_field_affil_phone:
15408         case Publication_field_affil_zipcode:
15409           if (csp->authors == NULL) {
15410             csp->authors = AuthListNew();
15411           }
15412           if (csp->authors->affil == NULL) {
15413             csp->authors->affil = AffilNew();
15414           }
15415           rval = SetAffilPubField (csp->authors->affil, field, scp, value, existing_text);
15416           break;
15417         case Publication_field_date:
15418           rval = SetPubDate (&(csp->date), scp, value, existing_text);
15419           break;
15420       }
15421       break;
15422     case PUB_Article :
15423       cap = (CitArtPtr) the_pub->data.ptrvalue;
15424       switch (field) {
15425         case Publication_field_pmid:
15426           rval = SetPMIDOnCitArt (cap, scp, value, existing_text);
15427           break;
15428         case Publication_field_title:
15429           rval = SetStringsInValNodeStringList (&(cap->title), scp, value, existing_text);
15430           SetValNodeChoices (cap->title, 1);
15431           break;
15432         case Publication_field_authors:
15433           if (cap->authors == NULL) {
15434             cap->authors = AuthListNew();
15435           }
15436           rval = SetAuthorListFromString (cap->authors, scp, value, existing_text);
15437           break;
15438         case Publication_field_affiliation:
15439         case Publication_field_affil_div:
15440         case Publication_field_affil_city:
15441         case Publication_field_affil_sub:
15442         case Publication_field_affil_country:
15443         case Publication_field_affil_street:
15444         case Publication_field_affil_email:
15445         case Publication_field_affil_fax:
15446         case Publication_field_affil_phone:
15447         case Publication_field_affil_zipcode:
15448           if (cap->authors == NULL) {
15449             cap->authors = AuthListNew();
15450           }
15451           if (cap->authors->affil == NULL) {
15452             cap->authors->affil = AffilNew();
15453           }
15454           rval = SetAffilPubField (cap->authors->affil, field, scp, value, existing_text);
15455           break;
15456         default:
15457           if (cap->from == 1) {
15458             rval = SetPubFieldOnCitJour (cap->fromptr, field, scp, value, existing_text);
15459           } else if (cap->from == 2) {
15460             rval = SetPubFieldOnCitBook (cap->fromptr, field, scp, value, existing_text);
15461           }
15462           break;
15463       }
15464       break;
15465     case PUB_Journal:
15466       rval = SetPubFieldOnCitJour (the_pub->data.ptrvalue, field, scp, value, existing_text);
15467       break;
15468     case PUB_Book :
15469     case PUB_Man :
15470       cbp = (CitBookPtr) the_pub->data.ptrvalue;
15471       rval = SetPubFieldOnCitBook (cbp, field, scp, value, existing_text);
15472       break;
15473     case PUB_Patent :
15474       cpp = (CitPatPtr) the_pub->data.ptrvalue;
15475       switch (field) {
15476         case Publication_field_title:
15477           if (DoesStringMatchConstraint(cpp->title, scp)) {
15478             rval = SetStringValue ( &(cpp->title), value, existing_text);
15479           }
15480           break;
15481         case Publication_field_authors:
15482           if (cpp->authors == NULL) {
15483             cpp->authors = AuthListNew();
15484           }
15485           rval = SetAuthorListFromString (cpp->authors, scp, value, existing_text);
15486           break;
15487         case Publication_field_affiliation:
15488         case Publication_field_affil_div:
15489         case Publication_field_affil_city:
15490         case Publication_field_affil_sub:
15491         case Publication_field_affil_country:
15492         case Publication_field_affil_street:
15493         case Publication_field_affil_email:
15494         case Publication_field_affil_fax:
15495         case Publication_field_affil_phone:
15496         case Publication_field_affil_zipcode:
15497           if (cpp->authors == NULL) {
15498             cpp->authors = AuthListNew();
15499           }
15500           if (cpp->authors->affil == NULL) {
15501             cpp->authors->affil = AffilNew();
15502           }
15503           rval = SetAffilPubField (cpp->authors->affil, field, scp, value, existing_text);
15504           break;
15505       }
15506       break;
15507     case PUB_PMid:
15508       if (field == Publication_field_pmid && StringIsAllDigits (value) && DoesNumberMatchStringConstraint(the_pub->data.intvalue, scp)
15509           && existing_text == ExistingTextOption_replace_old) {
15510         the_pub->data.intvalue = atoi (value);
15511         rval = TRUE;
15512       }
15513       break;
15514     default :
15515       break;
15516   }
15517   return rval;
15518 }
15519 
15520 
15521 
GetPubFieldFromObject(Uint1 choice,Pointer data,Int4 field,StringConstraintPtr scp)15522 static CharPtr GetPubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp)
15523 {
15524   CharPtr rval = NULL;
15525   PubdescPtr pdp = NULL;
15526   PubPtr     pub;
15527   SeqFeatPtr sfp;
15528   SeqDescrPtr sdp;
15529 
15530   if (data == NULL) return NULL;
15531   if (choice == OBJ_SEQFEAT) {
15532     sfp = (SeqFeatPtr) data;
15533     if (sfp->data.choice == SEQFEAT_PUB) {
15534       pdp = sfp->data.value.ptrvalue;
15535     }
15536   } else if (choice == OBJ_SEQDESC) {
15537     sdp = (SeqDescrPtr) data;
15538     if (sdp->choice == Seq_descr_pub) {
15539       pdp = sdp->data.ptrvalue;
15540     }
15541   }
15542 
15543   if (pdp == NULL) return NULL;
15544   for (pub = pdp->pub; pub != NULL && rval == NULL; pub = pub->next) {
15545     rval = GetPubFieldFromPub (pub, field, scp);
15546   }
15547 
15548   return rval;
15549 }
15550 
15551 
RemovePubFieldFromObject(Uint1 choice,Pointer data,Int4 field,StringConstraintPtr scp)15552 static Boolean RemovePubFieldFromObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp)
15553 {
15554   Boolean    rval = FALSE;
15555   PubdescPtr pdp = NULL;
15556   PubPtr     pub, pub_prev = NULL, pub_next;
15557   SeqFeatPtr sfp;
15558   SeqDescrPtr sdp;
15559 
15560   if (data == NULL) return FALSE;
15561   if (choice == OBJ_SEQFEAT) {
15562     sfp = (SeqFeatPtr) data;
15563     if (sfp->data.choice == SEQFEAT_PUB) {
15564       pdp = sfp->data.value.ptrvalue;
15565     }
15566   } else if (choice == OBJ_SEQDESC) {
15567     sdp = (SeqDescrPtr) data;
15568     if (sdp->choice == Seq_descr_pub) {
15569       pdp = sdp->data.ptrvalue;
15570     }
15571   }
15572 
15573   if (pdp == NULL) return FALSE;
15574 
15575   pub = pdp->pub;
15576   while (pub != NULL) {
15577     pub_next = pub->next;
15578     rval |= RemovePubFieldFromPub (pub, field, scp);
15579     if (field == Publication_field_pmid && pub->choice == PUB_PMid && pub->data.intvalue == 0) {
15580       if (pub_prev == NULL) {
15581         pdp->pub = pub_next;
15582       } else {
15583         pub_prev->next = pub_next;
15584       }
15585       pub->next = NULL;
15586       pub = PubFree (pub);
15587     } else {
15588       pub_prev = pub;
15589     }
15590     pub = pub->next;
15591   }
15592   return rval;
15593 }
15594 
15595 
SetPubFieldOnObject(Uint1 choice,Pointer data,Int4 field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)15596 static Boolean SetPubFieldOnObject (Uint1 choice, Pointer data, Int4 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
15597 {
15598   Boolean    rval = FALSE;
15599   PubdescPtr pdp = NULL;
15600   PubPtr     pub;
15601   SeqFeatPtr sfp;
15602   SeqDescrPtr sdp = NULL;
15603 
15604   if (data == NULL) return FALSE;
15605   if (choice == OBJ_SEQFEAT) {
15606     sfp = (SeqFeatPtr) data;
15607     if (sfp->data.choice == SEQFEAT_PUB) {
15608       pdp = sfp->data.value.ptrvalue;
15609     }
15610   } else if (choice == OBJ_SEQDESC) {
15611     sdp = (SeqDescrPtr) data;
15612     if (sdp->choice == Seq_descr_pub) {
15613       pdp = sdp->data.ptrvalue;
15614     }
15615   }
15616 
15617   if (pdp == NULL) return FALSE;
15618 
15619   for (pub = pdp->pub; pub != NULL; pub = pub->next) {
15620     rval |= SetPubFieldOnPub (pub, field, scp, value, existing_text);
15621   }
15622   if (!rval && field == Publication_field_pmid && IsStringConstraintEmpty (scp) && StringIsAllDigits(value)) {
15623     /* first, set pub class to published for pre-existing pub */
15624     if (pdp->pub != NULL && pdp->pub->choice == PUB_Gen) {
15625         SetPubclassOnPub(pdp->pub, "journal");
15626     }
15627     ValNodeAddInt (&pdp->pub, PUB_PMid, atoi (value));
15628   }
15629   return rval;
15630 }
15631 
15632 
FieldTypeFromAECRAction(AECRActionPtr action)15633 NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action)
15634 {
15635   Uint1 field_type = 0;
15636   ApplyActionPtr a;
15637   EditActionPtr  e;
15638   ConvertActionPtr v;
15639   CopyActionPtr c;
15640   SwapActionPtr s;
15641   RemoveActionPtr r;
15642   AECRParseActionPtr p;
15643   RemoveOutsideActionPtr ro;
15644 
15645   if (action == NULL || action->action == NULL || action->action->data.ptrvalue == NULL) {
15646     return 0;
15647   }
15648   switch (action->action->choice) {
15649     case ActionChoice_apply:
15650       a = (ApplyActionPtr) action->action->data.ptrvalue;
15651       if (a->field != NULL) {
15652         field_type = a->field->choice;
15653       }
15654       break;
15655     case ActionChoice_edit:
15656       e = (EditActionPtr) action->action->data.ptrvalue;
15657       if (e->field != NULL) {
15658         field_type = e->field->choice;
15659       }
15660       break;
15661     case ActionChoice_remove_outside:
15662       ro = (RemoveOutsideActionPtr) action->action->data.ptrvalue;
15663       if (ro != NULL && ro->field != NULL) {
15664         field_type = ro->field->choice;
15665       }
15666       break;
15667     case ActionChoice_convert:
15668       v = (ConvertActionPtr) action->action->data.ptrvalue;
15669       if (v->fields != NULL) {
15670         field_type = FieldTypeChoiceFromFieldPairTypeChoice (v->fields->choice);
15671       }
15672       break;
15673     case ActionChoice_copy:
15674       c = (CopyActionPtr) action->action->data.ptrvalue;
15675       if (c->fields != NULL) {
15676         field_type = FieldTypeChoiceFromFieldPairTypeChoice (c->fields->choice);
15677       }
15678       break;
15679     case ActionChoice_swap:
15680       s = (SwapActionPtr) action->action->data.ptrvalue;
15681       if (s->fields != NULL) {
15682         field_type = FieldTypeChoiceFromFieldPairTypeChoice (s->fields->choice);
15683       }
15684       break;
15685     case ActionChoice_remove:
15686       r = (RemoveActionPtr) action->action->data.ptrvalue;
15687       if (r->field != NULL) {
15688         field_type = r->field->choice;
15689       }
15690       break;
15691     case ActionChoice_parse:
15692       p = (AECRParseActionPtr) action->action->data.ptrvalue;
15693       if (p->fields != NULL) {
15694         field_type = FieldTypeChoiceFromFieldPairTypeChoice (p->fields->choice);
15695       }
15696       break;
15697   }
15698   return field_type;
15699 }
15700 
15701 typedef struct pubserialnumber {
15702   BioseqPtr bsp;
15703   Int4      serial_number;
15704   ValNodePtr min_pub;
15705 } PubSerialNumberData, PNTR PubSerialNumberPtr;
15706 
15707 
PubSerialNumberNew()15708 static PubSerialNumberPtr PubSerialNumberNew ()
15709 {
15710   PubSerialNumberPtr psn;
15711 
15712   psn = (PubSerialNumberPtr) MemNew (sizeof (PubSerialNumberData));
15713   psn->bsp = NULL;
15714   psn->serial_number = 0;
15715   psn->min_pub = NULL;
15716 
15717   return psn;
15718 }
15719 
15720 
PubSerialNumberFree(PubSerialNumberPtr psn)15721 static PubSerialNumberPtr PubSerialNumberFree (PubSerialNumberPtr psn)
15722 {
15723   if (psn != NULL) {
15724     psn->min_pub = PubSetFree (psn->min_pub);
15725     psn = MemFree (psn);
15726   }
15727   return psn;
15728 }
15729 
15730 
PubSerialNumberListFree(ValNodePtr vnp)15731 NLM_EXTERN ValNodePtr PubSerialNumberListFree (ValNodePtr vnp)
15732 {
15733   ValNodePtr vnp_next;
15734 
15735   while (vnp != NULL) {
15736     vnp_next = vnp->next;
15737     vnp->next = NULL;
15738     vnp->data.ptrvalue = PubSerialNumberFree (vnp->data.ptrvalue);
15739     vnp = ValNodeFree (vnp);
15740     vnp = vnp_next;
15741   }
15742   return vnp;
15743 }
15744 
15745 
CaptureRefBlockSerialNumbers(CharPtr str,Pointer userdata,BlockType blocktype,Uint2 entityID,Uint2 itemtype,Uint4 itemID,Int4 left,Int4 right)15746 static void CaptureRefBlockSerialNumbers
15747 (CharPtr str,
15748  Pointer userdata,
15749  BlockType blocktype,
15750  Uint2 entityID,
15751  Uint2 itemtype,
15752  Uint4 itemID,
15753  Int4 left,
15754  Int4 right
15755 )
15756 {
15757   CharPtr          cp;
15758   Int4             serial_number;
15759   ValNodePtr       vnp;
15760   BioseqPtr        bsp = NULL;
15761   SeqFeatPtr       sfp;
15762   SeqDescrPtr      sdp;
15763   SeqMgrFeatContext fcontext;
15764   SeqMgrDescContext dcontext;
15765   PubSerialNumberPtr psn;
15766   ValNodePtr        ppr = NULL;
15767   PubdescPtr        pdp = NULL;
15768 
15769   if (blocktype != REFERENCE_BLOCK || userdata == NULL) return;
15770   if (StringNICmp (str, "REFERENCE", 9) != 0) {
15771     return;
15772   }
15773   cp = str + 9;
15774   while (isspace (*cp)) {
15775     cp++;
15776   }
15777   if (!isdigit (*cp)) {
15778     return;
15779   }
15780   serial_number = atoi (cp);
15781 
15782   if (itemtype == OBJ_SEQFEAT) {
15783     sfp = SeqMgrGetDesiredFeature (entityID, NULL, itemID, 0, NULL, &fcontext);
15784     if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
15785       pdp = (PubdescPtr) sfp->data.value.ptrvalue;
15786       bsp = GetSequenceForObject (OBJ_SEQFEAT, sfp);
15787     }
15788   } else if (itemtype == OBJ_SEQDESC) {
15789     sdp = SeqMgrGetDesiredDescriptor (entityID, NULL, itemID, 0, NULL, &dcontext);
15790     if (sdp != NULL && sdp->choice == Seq_descr_pub) {
15791       pdp = (PubdescPtr) sdp->data.ptrvalue;
15792       bsp = GetSequenceForObject (OBJ_SEQDESC, sdp);
15793     }
15794   }
15795   if (pdp != NULL && bsp != NULL) {
15796     vnp = ValNodeNew (NULL);
15797     if (vnp != NULL) {
15798       vnp->choice = PUB_Equiv;
15799       vnp->data.ptrvalue = pdp->pub;
15800       ppr = MinimizePub (vnp);
15801       ValNodeFree (vnp);
15802     }
15803     vnp = ValNodeNew (NULL);
15804     if (vnp != NULL) {
15805       vnp->choice = PUB_Equiv;
15806       vnp->data.ptrvalue = ppr;
15807 
15808       psn = PubSerialNumberNew ();
15809       psn->bsp = bsp;
15810       psn->serial_number = serial_number;
15811       psn->min_pub = vnp;
15812       ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, psn);
15813     }
15814   }
15815 }
15816 
15817 
GetCitListsForSeqEntry(SeqEntryPtr sep)15818 NLM_EXTERN ValNodePtr GetCitListsForSeqEntry (SeqEntryPtr sep)
15819 {
15820   XtraBlock       xtra;
15821   ValNodePtr      head = NULL;
15822   ErrSev          level;
15823   Boolean         okay;
15824   SeqEntryPtr     oldscope;
15825   Uint2           entityID;
15826 
15827   if (sep == NULL) return NULL;
15828 
15829   MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
15830   xtra.ffwrite = CaptureRefBlockSerialNumbers;
15831   xtra.userdata = (Pointer) &head;
15832   level = ErrSetMessageLevel (SEV_MAX);
15833   oldscope = SeqEntrySetScope (sep);
15834   okay = SeqEntryToGnbk (sep, NULL, GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE,
15835                          SHOW_CONTIG_FEATURES, 0, 0, &xtra, NULL);
15836   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
15837   SeqEntrySetScope (oldscope);
15838   ErrSetMessageLevel (level);
15839   return head;
15840 }
15841 
15842 
GetCitationNumberForMinPub(BioseqPtr bsp,ValNodePtr min_pub,ValNodePtr pub_list)15843 NLM_EXTERN Int4 GetCitationNumberForMinPub (BioseqPtr bsp, ValNodePtr min_pub, ValNodePtr pub_list)
15844 {
15845   Int4 rval = -1;
15846   PubSerialNumberPtr psn;
15847   ValNodePtr vnp, tmp;
15848 
15849   if (bsp == NULL || min_pub == NULL || pub_list == NULL) {
15850     return -1;
15851   }
15852 
15853   tmp = ValNodeNew (NULL);
15854   tmp->choice = PUB_Equiv;
15855   tmp->data.ptrvalue = min_pub;
15856 
15857   for (vnp = pub_list; vnp != NULL && rval == -1; vnp = vnp->next) {
15858     psn = (PubSerialNumberPtr) vnp->data.ptrvalue;
15859     if (psn->bsp == bsp) {
15860       if (PubLabelMatch (tmp, psn->min_pub) == 0) {
15861         rval = psn->serial_number;
15862       }
15863     }
15864   }
15865 
15866   tmp = ValNodeFree (tmp);
15867 
15868   return rval;
15869 }
15870 
15871 
GetMinPubForCitationNumber(BioseqPtr bsp,Int4 number,ValNodePtr pub_list)15872 NLM_EXTERN ValNodePtr GetMinPubForCitationNumber (BioseqPtr bsp, Int4 number, ValNodePtr pub_list)
15873 {
15874   ValNodePtr rval = NULL;
15875   PubSerialNumberPtr psn;
15876   ValNodePtr vnp;
15877 
15878   if (bsp == NULL || number < 0 || pub_list == NULL) {
15879     return NULL;
15880   }
15881 
15882   for (vnp = pub_list; vnp != NULL && rval == NULL; vnp = vnp->next) {
15883     psn = (PubSerialNumberPtr) vnp->data.ptrvalue;
15884     if (psn->bsp == bsp && psn->serial_number == number) {
15885       rval = psn->min_pub;
15886     }
15887   }
15888 
15889   return rval;
15890 }
15891 
15892 
15893 /*
15894  * Some batch operations will be faster if information about the entire record is collected once
15895  * and reused.  The BatchExtra structure is where such data belongs.
15896  */
BatchExtraNew()15897 NLM_EXTERN BatchExtraPtr BatchExtraNew ()
15898 {
15899   BatchExtraPtr b;
15900 
15901   b = (BatchExtraPtr) MemNew (sizeof (BatchExtraData));
15902   b->cit_list = NULL;
15903 
15904   return b;
15905 }
15906 
15907 
BatchExtraFree(BatchExtraPtr b)15908 NLM_EXTERN BatchExtraPtr BatchExtraFree (BatchExtraPtr b)
15909 {
15910   if (b != NULL) {
15911     b->cit_list = PubSerialNumberListFree (b->cit_list);
15912 
15913     b = MemFree (b);
15914   }
15915   return b;
15916 }
15917 
15918 
IsCitationField(FieldTypePtr field)15919 static Boolean IsCitationField (FieldTypePtr field)
15920 {
15921   FeatureFieldPtr feature_field;
15922 
15923   if (field != NULL
15924       && field->choice == FieldType_feature_field
15925       && (feature_field = field->data.ptrvalue) != NULL
15926       && feature_field->field != NULL
15927       && ((feature_field->field->choice == FeatQualChoice_legal_qual
15928            && feature_field->field->data.intvalue == Feat_qual_legal_citation)
15929           || (feature_field->field->choice == FeatQualChoice_illegal_qual
15930           && DoesStringMatchConstraint ("citation", feature_field->field->data.ptrvalue)))) {
15931     return TRUE;
15932   } else {
15933     return FALSE;
15934   }
15935 
15936 }
15937 
15938 
InitBatchExtraForField(BatchExtraPtr batch_extra,FieldTypePtr field,SeqEntryPtr sep)15939 static void InitBatchExtraForField (BatchExtraPtr batch_extra, FieldTypePtr field, SeqEntryPtr sep)
15940 {
15941   if (batch_extra == NULL) {
15942     return;
15943   }
15944   /* only need to collect citations if citation is in the list of applicable fields */
15945   if (IsCitationField (field)) {
15946     ValNodeLink (&(batch_extra->cit_list), GetCitListsForSeqEntry (sep));
15947   }
15948 }
15949 
15950 
InitBatchExtraForAECRAction(BatchExtraPtr batch_extra,AECRActionPtr action,SeqEntryPtr sep)15951 static void InitBatchExtraForAECRAction (BatchExtraPtr batch_extra, AECRActionPtr action, SeqEntryPtr sep)
15952 {
15953   ValNodePtr field_list, field;
15954 
15955   if (batch_extra == NULL || action == NULL) {
15956     return;
15957   }
15958 
15959   field_list = GetFieldTypeListFromAECRAction (action);
15960   for (field = field_list; field != NULL; field = field->next) {
15961     InitBatchExtraForField (batch_extra, field, sep);
15962   }
15963   field_list = FieldTypeListFree (field_list);
15964 }
15965 
15966 
SortVnpByObject(VoidPtr ptr1,VoidPtr ptr2)15967 NLM_EXTERN int LIBCALLBACK SortVnpByObject (VoidPtr ptr1, VoidPtr ptr2)
15968 
15969 {
15970   ValNodePtr  vnp1;
15971   ValNodePtr  vnp2;
15972   CharPtr     str1, str2;
15973   int         rval = 0;
15974 
15975   if (ptr1 != NULL && ptr2 != NULL) {
15976     vnp1 = *((ValNodePtr PNTR) ptr1);
15977     vnp2 = *((ValNodePtr PNTR) ptr2);
15978     if (vnp1 != NULL && vnp2 != NULL) {
15979       if (vnp1->choice < vnp2->choice) {
15980         rval = -1;
15981       } else if (vnp1->choice > vnp2->choice) {
15982         rval = 1;
15983       } else {
15984         str1 = GetDiscrepancyItemText (vnp1);
15985         str2 = GetDiscrepancyItemText (vnp2);
15986         rval = StringCmp (str1, str1);
15987         str1 = MemFree (str1);
15988         str2 = MemFree (str2);
15989       }
15990     }
15991   }
15992 
15993   return rval;
15994 }
15995 
15996 
BioseqListForObjectList(ValNodePtr object_list)15997 static ValNodePtr BioseqListForObjectList (ValNodePtr object_list)
15998 {
15999   ValNodePtr vnp, bsp_list = NULL;
16000   BioseqPtr  bsp;
16001 
16002   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
16003     bsp = GetSequenceForObject (vnp->choice, vnp->data.ptrvalue);
16004     if (bsp != NULL) {
16005       ValNodeAddPointer (&bsp_list, OBJ_BIOSEQ, bsp);
16006     }
16007   }
16008   bsp_list = ValNodeSort (bsp_list, SortVnpByObject);
16009   ValNodeUnique (&bsp_list, SortVnpByObject, ValNodeFree);
16010   return bsp_list;
16011 }
16012 
16013 
InitBatchExtraForAECRActionAndObjectList(BatchExtraPtr batch_extra,AECRActionPtr action,ValNodePtr object_list)16014 static void InitBatchExtraForAECRActionAndObjectList (BatchExtraPtr batch_extra, AECRActionPtr action, ValNodePtr object_list)
16015 {
16016   ValNodePtr field_list, field;
16017   ValNodePtr bsp_list = NULL, vnp;
16018   SeqEntryPtr sep;
16019 
16020   if (batch_extra == NULL || action == NULL) {
16021     return;
16022   }
16023 
16024   field_list = GetFieldTypeListFromAECRAction (action);
16025   bsp_list = BioseqListForObjectList (object_list);
16026   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
16027     sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue);
16028     for (field = field_list; field != NULL; field = field->next) {
16029       InitBatchExtraForField (batch_extra, field, sep);
16030     }
16031   }
16032   bsp_list = ValNodeFree (bsp_list);
16033 
16034   field_list = FieldTypeListFree (field_list);
16035 
16036 }
16037 
16038 
16039 
16040 
16041 
GetFieldValueForObjectEx(Uint1 choice,Pointer data,FieldTypePtr field,StringConstraintPtr scp,BatchExtraPtr batch_extra)16042 NLM_EXTERN CharPtr GetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
16043 {
16044   CharPtr str = NULL;
16045   FeatureFieldPtr feature_field;
16046   SeqDescrPtr     sdp;
16047   GBBlockPtr      gb;
16048   SeqMgrDescContext context;
16049 
16050   if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return NULL;
16051 
16052   switch (field->choice) {
16053     case FieldType_source_qual :
16054       str = GetSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp);
16055       break;
16056     case FieldType_feature_field :
16057       if (choice == OBJ_SEQFEAT) {
16058         str = GetQualFromFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, batch_extra);
16059       }
16060       break;
16061     case FieldType_cds_gene_prot :
16062       if (choice == 0) {
16063         str = GetFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp);
16064       } else if (choice == OBJ_SEQFEAT) {
16065         feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
16066         str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp);
16067         feature_field = FeatureFieldFree (feature_field);
16068       }
16069       break;
16070     case FieldType_molinfo_field :
16071       if (choice == OBJ_BIOSEQ) {
16072         str = GetSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue);
16073       }
16074       break;
16075     case FieldType_pub :
16076       str = GetPubFieldFromObject (choice, data, field->data.intvalue, scp);
16077       break;
16078     case FieldType_rna_field :
16079       if (choice == OBJ_SEQFEAT) {
16080         str = GetRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, NULL);
16081       }
16082       break;
16083     case FieldType_struc_comment_field:
16084       if (choice == OBJ_SEQDESC && data != NULL) {
16085         sdp = (SeqDescrPtr) data;
16086         if (sdp != NULL && sdp->choice == Seq_descr_user) {
16087           str = GetStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp);
16088         }
16089       }
16090       break;
16091     case FieldType_dblink:
16092       if (choice == OBJ_SEQDESC && data != NULL) {
16093         sdp = (SeqDescrPtr) data;
16094         if (sdp != NULL && sdp->choice == Seq_descr_user) {
16095           str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
16096         }
16097       } else if (choice == OBJ_BIOSEQ) {
16098         for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_user, &context);
16099              sdp != NULL && str == NULL;
16100              sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_user, &context)) {
16101           str = GetDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
16102         }
16103       }
16104       break;
16105     case FieldType_misc:
16106       if (choice == OBJ_BIOSEQ) {
16107         if (field->data.intvalue == Misc_field_genome_project_id) {
16108           str = GetGenomeProjectIdFromBioseq ((BioseqPtr) data, scp);
16109         } else if (field->data.intvalue == Misc_field_comment_descriptor) {
16110           str = NULL;
16111           for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_comment, &context);
16112                sdp != NULL && str == NULL;
16113                sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_comment, &context)) {
16114             if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) {
16115               str = StringSave (sdp->data.ptrvalue);
16116             }
16117           }
16118         } else if (field->data.intvalue == Misc_field_defline) {
16119           str = NULL;
16120           for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_title, &context);
16121                sdp != NULL && str == NULL;
16122                sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_title, &context)) {
16123             if (DoesStringMatchConstraint (sdp->data.ptrvalue, scp)) {
16124               str = StringSave (sdp->data.ptrvalue);
16125             }
16126           }
16127         } else if (field->data.intvalue == Misc_field_keyword) {
16128           str = NULL;
16129           for (sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, NULL, Seq_descr_genbank, &context);
16130                sdp != NULL && str == NULL;
16131                sdp = SeqMgrGetNextDescriptor ((BioseqPtr) data, sdp, Seq_descr_genbank, &context)) {
16132             gb = (GBBlockPtr) sdp->data.ptrvalue;
16133             str = GetFirstValNodeStringMatch (gb->keywords, scp);
16134           }
16135         }
16136       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) {
16137         sdp = (SeqDescrPtr) data;
16138         if (sdp != NULL && sdp->choice == Seq_descr_comment && !StringHasNoText (sdp->data.ptrvalue)) {
16139           str = StringSave (sdp->data.ptrvalue);
16140         }
16141       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) {
16142         sdp = (SeqDescrPtr) data;
16143         if (sdp != NULL && sdp->choice == Seq_descr_title && !StringHasNoText (sdp->data.ptrvalue)) {
16144           str = StringSave (sdp->data.ptrvalue);
16145         }
16146       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) {
16147         sdp = (SeqDescrPtr) data;
16148         if (sdp != NULL && sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) {
16149           str = GetFirstValNodeStringMatch (gb->keywords, scp);
16150         }
16151       }
16152       break;
16153   }
16154   return str;
16155 }
16156 
16157 
GetFieldValueForObject(Uint1 choice,Pointer data,FieldTypePtr field,StringConstraintPtr scp)16158 NLM_EXTERN CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp)
16159 {
16160   return GetFieldValueForObjectEx (choice, data, field, scp, NULL);
16161 }
16162 
16163 
GetMultipleFieldValuesForObject(Uint1 choice,Pointer data,FieldTypePtr field,StringConstraintPtr scp,BatchExtraPtr batch_extra)16164 NLM_EXTERN ValNodePtr GetMultipleFieldValuesForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, BatchExtraPtr batch_extra)
16165 {
16166   CharPtr    str = NULL;
16167   ValNodePtr val_list = NULL;
16168   SeqDescPtr sdp;
16169 
16170   if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
16171 
16172   if (field->choice == FieldType_source_qual) {
16173     val_list = GetMultipleSourceQualsFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp);
16174   } else if (field->choice == FieldType_dblink && choice == OBJ_SEQDESC
16175              && (sdp = (SeqDescPtr) data) != NULL && sdp->choice == Seq_descr_user) {
16176     val_list = GetMultipleDBLinkFieldValuesFromUserObject ((UserObjectPtr) sdp->data.ptrvalue, field->data.intvalue, scp);
16177   } else {
16178     str = GetFieldValueForObjectEx (choice, data, field, scp, batch_extra);
16179     if (str != NULL) {
16180       ValNodeAddPointer (&val_list, 0, str);
16181     }
16182   }
16183   return val_list;
16184 }
16185 
16186 
GBBlockIsCompletelyEmpty(GBBlockPtr gb)16187 NLM_EXTERN Boolean GBBlockIsCompletelyEmpty (GBBlockPtr gb)
16188 {
16189   if (gb != NULL
16190       && gb->extra_accessions == NULL
16191       && gb->keywords == NULL
16192       && gb->source == NULL
16193       && gb->origin == NULL
16194       && gb->date == NULL
16195       && gb->div == NULL
16196       && gb->taxonomy == NULL
16197       && gb->entry_date == NULL) {
16198     return TRUE;
16199   } else {
16200     return FALSE;
16201   }
16202 }
16203 
16204 
RemoveFieldValueForObject(Uint1 choice,Pointer data,FieldTypePtr field,StringConstraintPtr scp)16205 static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp)
16206 {
16207   Boolean rval = FALSE;
16208   FeatureFieldPtr feature_field;
16209   SeqDescrPtr     sdp;
16210   ObjValNodePtr   ovp;
16211   GBBlockPtr      gb;
16212 
16213   if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
16214 
16215   switch (field->choice) {
16216     case FieldType_source_qual :
16217       rval = RemoveSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp);
16218       break;
16219     case FieldType_feature_field :
16220       if (choice == OBJ_SEQFEAT) {
16221         rval = RemoveQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp);
16222       }
16223       break;
16224     case FieldType_cds_gene_prot:
16225       if (choice == 0) {
16226         rval = RemoveFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp);
16227       } else if (choice == OBJ_SEQFEAT) {
16228         feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
16229         rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp);
16230         feature_field = FeatureFieldFree (feature_field);
16231       }
16232       break;
16233     case FieldType_molinfo_field :
16234       if (choice == OBJ_BIOSEQ) {
16235         rval = RemoveSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue);
16236       }
16237       break;
16238     case FieldType_pub :
16239       rval = RemovePubFieldFromObject (choice, data, field->data.intvalue, scp);
16240       break;
16241     case FieldType_rna_field :
16242       if (choice == OBJ_SEQFEAT) {
16243         rval = RemoveRNAQualFromFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp);
16244       }
16245       break;
16246     case FieldType_struc_comment_field:
16247       if (choice == OBJ_SEQDESC && data != NULL) {
16248         sdp = (SeqDescrPtr) data;
16249         if (sdp != NULL && sdp->choice == Seq_descr_user) {
16250           rval = RemoveStructuredCommentFieldFromUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp);
16251           if (rval && IsEmptyStructuredComment (sdp->data.ptrvalue)) {
16252             ovp = (ObjValNodePtr) sdp;
16253             ovp->idx.deleteme = TRUE;
16254           }
16255         }
16256       }
16257       break;
16258     case FieldType_dblink:
16259       if (choice == OBJ_SEQDESC && data != NULL) {
16260         sdp = (SeqDescrPtr) data;
16261         if (sdp != NULL && sdp->choice == Seq_descr_user) {
16262           rval = RemoveDBLinkFieldFromUserObject (sdp->data.ptrvalue, field->data.intvalue, scp);
16263           if (rval && IsEmptyDBLink (sdp->data.ptrvalue)) {
16264             ovp = (ObjValNodePtr) sdp;
16265             ovp->idx.deleteme = TRUE;
16266           }
16267         }
16268       }
16269       break;
16270     case FieldType_misc:
16271       if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) {
16272         rval = RemoveGenomeProjectIdFromBioseq ((BioseqPtr) data, scp);
16273       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) {
16274         sdp = (SeqDescrPtr) data;
16275         ovp = (ObjValNodePtr) sdp;
16276         if (sdp->choice == Seq_descr_comment) {
16277           ovp->idx.deleteme = TRUE;
16278           rval = TRUE;
16279         }
16280       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) {
16281         sdp = (SeqDescrPtr) data;
16282         ovp = (ObjValNodePtr) sdp;
16283         if (sdp->choice == Seq_descr_title) {
16284           ovp->idx.deleteme = TRUE;
16285           rval = TRUE;
16286         }
16287       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) {
16288         sdp = (SeqDescrPtr) data;
16289         ovp = (ObjValNodePtr) sdp;
16290         if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) {
16291           if (RemoveValNodeStringMatch (&(gb->keywords), scp)) {
16292             rval = TRUE;
16293             if (GBBlockIsCompletelyEmpty(gb)) {
16294               ovp->idx.deleteme = TRUE;
16295             }
16296           }
16297         }
16298       }
16299       break;
16300   }
16301   return rval;
16302 }
16303 
16304 
SetFieldValueForObjectEx(Uint1 choice,Pointer data,FieldTypePtr field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text,BatchExtraPtr batch_extra)16305 NLM_EXTERN Boolean SetFieldValueForObjectEx (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text, BatchExtraPtr batch_extra)
16306 {
16307   Boolean rval = FALSE;
16308   FeatureFieldPtr feature_field;
16309   SeqDescrPtr     sdp;
16310   ObjValNodePtr   ovp;
16311   GBBlockPtr      gb;
16312   Boolean         was_empty;
16313 
16314   if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
16315 
16316   switch (field->choice) {
16317     case FieldType_source_qual :
16318       rval = SetSourceQualInBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp, value, existing_text);
16319       break;
16320     case FieldType_feature_field :
16321       if (choice == OBJ_SEQFEAT) {
16322         rval = SetQualOnFeatureEx ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, value, existing_text, batch_extra);
16323       }
16324       break;
16325     case FieldType_cds_gene_prot:
16326       if (choice == 0) {
16327         rval = SetFieldValueInCGPSet ((CGPSetPtr) data, field->data.intvalue, scp, value, existing_text);
16328       } else if (choice == OBJ_SEQFEAT) {
16329         feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
16330         rval = SetQualOnFeatureEx ((SeqFeatPtr) data, feature_field, scp, value, existing_text, batch_extra);
16331         feature_field = FeatureFieldFree (feature_field);
16332       }
16333       break;
16334     case FieldType_molinfo_field:
16335       if (choice == OBJ_BIOSEQ) {
16336         rval = SetSequenceQualOnBioseq ((BioseqPtr) data, field->data.ptrvalue);
16337       }
16338       break;
16339     case FieldType_pub :
16340       rval = SetPubFieldOnObject (choice, data, field->data.intvalue, scp, value, existing_text);
16341       break;
16342     case FieldType_rna_field :
16343       if (choice == OBJ_SEQFEAT) {
16344         rval = SetRNAQualOnFeature ((SeqFeatPtr) data, field->data.ptrvalue, scp, value, existing_text);
16345       }
16346       break;
16347     case FieldType_struc_comment_field:
16348       if (choice == OBJ_SEQDESC && data != NULL) {
16349         sdp = (SeqDescrPtr) data;
16350         if (sdp != NULL && sdp->choice == Seq_descr_user) {
16351           was_empty = IsEmptyStructuredComment (sdp->data.ptrvalue);
16352           rval = SetStructuredCommentFieldOnUserObject (sdp->data.ptrvalue, field->data.ptrvalue, scp, value, existing_text);
16353           if (was_empty && !IsEmptyStructuredComment (sdp->data.ptrvalue)) {
16354             ovp = (ObjValNodePtr) sdp;
16355             ovp->idx.deleteme = FALSE;
16356           }
16357         }
16358       }
16359       break;
16360     case FieldType_dblink:
16361       if (choice == OBJ_SEQDESC && data != NULL) {
16362         sdp = (SeqDescrPtr) data;
16363         if (sdp != NULL && sdp->choice == Seq_descr_user) {
16364           was_empty = IsEmptyDBLink (sdp->data.ptrvalue);
16365           rval = SetDBLinkFieldOnUserObject (sdp->data.ptrvalue, field->data.intvalue, scp, value, existing_text);
16366           if (was_empty && !IsEmptyDBLink (sdp->data.ptrvalue)) {
16367             ovp = (ObjValNodePtr) sdp;
16368             ovp->idx.deleteme = FALSE;
16369           }
16370         }
16371       }
16372       break;
16373     case FieldType_misc:
16374       if (choice == OBJ_BIOSEQ && field->data.intvalue == Misc_field_genome_project_id) {
16375         rval = SetGenomeProjectIdOnBioseq ((BioseqPtr) data, scp, value, existing_text);
16376       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_comment_descriptor) {
16377         sdp = (SeqDescrPtr) data;
16378         if (sdp->choice == Seq_descr_comment) {
16379           rval = SetTextDescriptor (sdp, scp, value, existing_text);
16380         }
16381       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_defline) {
16382         sdp = (SeqDescrPtr) data;
16383         if (sdp->choice == Seq_descr_title) {
16384           rval = SetTextDescriptor (sdp, scp, value, existing_text);
16385           RemoveAutodefObjectsForDesc(sdp);
16386         }
16387       } else if (choice == OBJ_SEQDESC && field->data.intvalue == Misc_field_keyword) {
16388         sdp = (SeqDescrPtr) data;
16389         if (sdp->choice == Seq_descr_genbank && (gb = (GBBlockPtr) sdp->data.ptrvalue) != NULL) {
16390           was_empty = GBBlockIsCompletelyEmpty (gb);
16391           if (SetStringsInValNodeStringList (&(gb->keywords), scp, value, existing_text)) {
16392             rval = TRUE;
16393             if (sdp->extended) {
16394               ovp = (ObjValNodePtr) sdp;
16395               if (GBBlockIsCompletelyEmpty(gb)) {
16396                 ovp->idx.deleteme = TRUE;
16397               } else if (was_empty) {
16398                 ovp->idx.deleteme = FALSE;
16399               }
16400             }
16401           }
16402         }
16403       }
16404       break;
16405   }
16406   return rval;
16407 }
16408 
16409 
SetFieldValueForObject(Uint1 choice,Pointer data,FieldTypePtr field,StringConstraintPtr scp,CharPtr value,Uint2 existing_text)16410 NLM_EXTERN Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
16411 {
16412   return SetFieldValueForObjectEx (choice, data, field, scp, value, existing_text, NULL);
16413 }
16414 
16415 
SortFieldsForObject(Uint1 choice,Pointer data,FieldTypePtr field,Uint2 order)16416 NLM_EXTERN Boolean SortFieldsForObject (Uint1 choice, Pointer data, FieldTypePtr field, Uint2 order)
16417 {
16418   Boolean rval = FALSE;
16419   FeatureFieldPtr feature_field;
16420 
16421   if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
16422 
16423   switch (field->choice) {
16424     case FieldType_source_qual :
16425       break;
16426     case FieldType_feature_field :
16427       if (choice == OBJ_SEQFEAT) {
16428         rval = SortQualOnFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, order);
16429       }
16430       break;
16431     case FieldType_cds_gene_prot:
16432       if (choice == 0) {
16433         rval = SortFieldInCGPSet ((CGPSetPtr) data, field->data.intvalue, order);
16434       } else if (choice == OBJ_SEQFEAT) {
16435         feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
16436         rval = SortQualOnFeature ((SeqFeatPtr) data, feature_field, order);
16437         feature_field = FeatureFieldFree (feature_field);
16438       }
16439       break;
16440     case FieldType_molinfo_field:
16441       break;
16442     case FieldType_pub :
16443       break;
16444     case FieldType_rna_field :
16445       break;
16446     case FieldType_struc_comment_field:
16447       break;
16448     case FieldType_dblink:
16449       break;
16450     case FieldType_misc:
16451       break;
16452   }
16453   return rval;
16454 }
16455 
16456 
GetFieldTypeListFromAECRAction(AECRActionPtr action)16457 NLM_EXTERN ValNodePtr GetFieldTypeListFromAECRAction (AECRActionPtr action)
16458 {
16459   ValNodePtr field_list = NULL;
16460   ApplyActionPtr apply;
16461   EditActionPtr  edit;
16462   ConvertActionPtr convert;
16463   CopyActionPtr    copy;
16464   SwapActionPtr    swap;
16465   RemoveActionPtr  remove;
16466   AECRParseActionPtr parse;
16467   RemoveOutsideActionPtr ro;
16468 
16469   if (action == NULL) {
16470     return NULL;
16471   }
16472 
16473   /* todo - add fields from constraints ? */
16474 
16475   /* get fields from action */
16476   if (action->action != NULL) {
16477     switch (action->action->choice) {
16478       case ActionChoice_apply:
16479         apply = (ApplyActionPtr) action->action->data.ptrvalue;
16480         if (apply != NULL) {
16481           ValNodeLink (&field_list, FieldTypeCopy (apply->field));
16482         }
16483         break;
16484       case ActionChoice_edit:
16485         edit = (EditActionPtr) action->action->data.ptrvalue;
16486         if (edit != NULL) {
16487           ValNodeLink (&field_list, FieldTypeCopy (edit->field));
16488         }
16489         break;
16490       case ActionChoice_remove_outside:
16491         ro = (RemoveOutsideActionPtr) action->action->data.ptrvalue;
16492         if (ro != NULL) {
16493           ValNodeLink (&field_list, FieldTypeCopy (ro->field));
16494         }
16495         break;
16496       case ActionChoice_convert:
16497         convert = (ConvertActionPtr) action->action->data.ptrvalue;
16498         if (convert != NULL) {
16499           ValNodeLink (&field_list, GetFromFieldFromFieldPair (convert->fields));
16500           ValNodeLink (&field_list, GetToFieldFromFieldPair (convert->fields));
16501         }
16502         break;
16503       case ActionChoice_copy:
16504         copy = (CopyActionPtr) action->action->data.ptrvalue;
16505         if (copy != NULL) {
16506           ValNodeLink (&field_list, GetFromFieldFromFieldPair (copy->fields));
16507           ValNodeLink (&field_list, GetToFieldFromFieldPair (copy->fields));
16508         }
16509         break;
16510       case ActionChoice_swap:
16511         swap = (SwapActionPtr) action->action->data.ptrvalue;
16512         if (swap != NULL) {
16513           ValNodeLink (&field_list, GetFromFieldFromFieldPair (swap->fields));
16514           ValNodeLink (&field_list, GetToFieldFromFieldPair (swap->fields));
16515         }
16516         break;
16517       case ActionChoice_remove:
16518         remove = (RemoveActionPtr) action->action->data.ptrvalue;
16519         if (remove != NULL) {
16520           ValNodeLink (&field_list, FieldTypeCopy (remove->field));
16521         }
16522         break;
16523       case ActionChoice_parse:
16524         parse = (AECRParseActionPtr) action->action->data.ptrvalue;
16525         if (parse != NULL) {
16526           ValNodeLink (&field_list, GetFromFieldFromFieldPair (parse->fields));
16527           ValNodeLink (&field_list, GetToFieldFromFieldPair (parse->fields));
16528         }
16529         break;
16530     }
16531   }
16532   return field_list;
16533 }
16534 
16535 
AreAECRActionFieldsEqual(AECRActionPtr action1,AECRActionPtr action2)16536 NLM_EXTERN Boolean AreAECRActionFieldsEqual (AECRActionPtr action1, AECRActionPtr action2)
16537 {
16538   ApplyActionPtr a1, a2;
16539   EditActionPtr  e1, e2;
16540   ConvertActionPtr v1, v2;
16541   CopyActionPtr c1, c2;
16542   SwapActionPtr s1, s2;
16543   RemoveActionPtr r1, r2;
16544   AECRParseActionPtr p1, p2;
16545   RemoveOutsideActionPtr ro1, ro2;
16546   FieldTypePtr       field1, field2;
16547   Boolean            rval = FALSE;
16548 
16549   if (action1 == NULL && action2 == NULL) {
16550     return TRUE;
16551   } else if (action1 == NULL || action2 == NULL) {
16552     return FALSE;
16553   } else if (action1->action == NULL && action2->action == NULL) {
16554     return TRUE;
16555   } else if (action1->action == NULL || action2->action == NULL) {
16556     return FALSE;
16557   } else if (action1->action->choice != action2->action->choice) {
16558     return FALSE;
16559   } else if (action1->action->data.ptrvalue == NULL && action2->action->data.ptrvalue == NULL) {
16560     return TRUE;
16561   } else if (action1->action->data.ptrvalue == NULL || action2->action->data.ptrvalue == NULL) {
16562     return FALSE;
16563   } else {
16564     switch (action1->action->choice) {
16565       case ActionChoice_apply:
16566         a1 = (ApplyActionPtr) action1->action->data.ptrvalue;
16567         a2 = (ApplyActionPtr) action2->action->data.ptrvalue;
16568         rval = DoFieldTypesMatch (a1->field, a2->field);
16569         break;
16570       case ActionChoice_edit:
16571         e1 = (EditActionPtr) action1->action->data.ptrvalue;
16572         e2 = (EditActionPtr) action2->action->data.ptrvalue;
16573         rval = DoFieldTypesMatch (e1->field, e2->field);
16574         break;
16575       case ActionChoice_remove_outside:
16576         ro1 = (RemoveOutsideActionPtr) action1->action->data.ptrvalue;
16577         ro2 = (RemoveOutsideActionPtr) action2->action->data.ptrvalue;
16578         rval = DoFieldTypesMatch (ro1->field, ro2->field);
16579         break;
16580       case ActionChoice_convert:
16581         v1 = (ConvertActionPtr) action1->action->data.ptrvalue;
16582         v2 = (ConvertActionPtr) action2->action->data.ptrvalue;
16583         field1 = GetFromFieldFromFieldPair (v1->fields);
16584         field2 = GetFromFieldFromFieldPair (v2->fields);
16585         rval = DoFieldTypesMatch (field1, field2);
16586         if (rval) {
16587           field1 = FieldTypeFree (field1);
16588           field2 = FieldTypeFree (field2);
16589           field1 = GetToFieldFromFieldPair (v1->fields);
16590           field2 = GetToFieldFromFieldPair (v2->fields);
16591           rval = DoFieldTypesMatch (field1, field2);
16592         }
16593         field1 = FieldTypeFree (field1);
16594         field2 = FieldTypeFree (field2);
16595         if (rval) {
16596           if ((v1->keep_original && !v2->keep_original)
16597             || (!v1->keep_original && v2->keep_original)) {
16598             rval = FALSE;
16599           }
16600         }
16601         break;
16602       case ActionChoice_copy:
16603         c1 = (CopyActionPtr) action1->action->data.ptrvalue;
16604         c2 = (CopyActionPtr) action2->action->data.ptrvalue;
16605         field1 = GetFromFieldFromFieldPair (c1->fields);
16606         field2 = GetFromFieldFromFieldPair (c2->fields);
16607         rval = DoFieldTypesMatch (field1, field2);
16608         if (rval) {
16609           field1 = FieldTypeFree (field1);
16610           field2 = FieldTypeFree (field2);
16611           field1 = GetToFieldFromFieldPair (c1->fields);
16612           field2 = GetToFieldFromFieldPair (c2->fields);
16613           rval = DoFieldTypesMatch (field1, field2);
16614         }
16615         field1 = FieldTypeFree (field1);
16616         field2 = FieldTypeFree (field2);
16617         break;
16618       case ActionChoice_swap:
16619         s1 = (SwapActionPtr) action1->action->data.ptrvalue;
16620         s2 = (SwapActionPtr) action2->action->data.ptrvalue;
16621         field1 = GetFromFieldFromFieldPair (s1->fields);
16622         field2 = GetFromFieldFromFieldPair (s2->fields);
16623         rval = DoFieldTypesMatch (field1, field2);
16624         if (rval) {
16625           field1 = FieldTypeFree (field1);
16626           field2 = FieldTypeFree (field2);
16627           field1 = GetToFieldFromFieldPair (s1->fields);
16628           field2 = GetToFieldFromFieldPair (s2->fields);
16629           rval = DoFieldTypesMatch (field1, field2);
16630         }
16631         field1 = FieldTypeFree (field1);
16632         field2 = FieldTypeFree (field2);
16633         break;
16634       case ActionChoice_remove:
16635         r1 = (RemoveActionPtr) action1->action->data.ptrvalue;
16636         r2 = (RemoveActionPtr) action2->action->data.ptrvalue;
16637         rval = DoFieldTypesMatch (r1->field, r2->field);
16638         break;
16639       case ActionChoice_parse:
16640         p1 = (AECRParseActionPtr) action1->action->data.ptrvalue;
16641         p2 = (AECRParseActionPtr) action2->action->data.ptrvalue;
16642         field1 = GetFromFieldFromFieldPair (p1->fields);
16643         field2 = GetFromFieldFromFieldPair (p2->fields);
16644         rval = DoFieldTypesMatch (field1, field2);
16645         if (rval) {
16646           field1 = FieldTypeFree (field1);
16647           field2 = FieldTypeFree (field2);
16648           field1 = GetToFieldFromFieldPair (p1->fields);
16649           field2 = GetToFieldFromFieldPair (p2->fields);
16650           rval = DoFieldTypesMatch (field1, field2);
16651         }
16652         field1 = FieldTypeFree (field1);
16653         field2 = FieldTypeFree (field2);
16654         break;
16655     }
16656   }
16657   return rval;
16658 }
16659 
16660 
IsNonTextSourceQualPresent(BioSourcePtr biop,Int4 srcqual)16661 static Boolean IsNonTextSourceQualPresent (BioSourcePtr biop, Int4 srcqual)
16662 {
16663   Int4 orgmod_subtype, subsrc_subtype, subfield;
16664   OrgModPtr mod;
16665   SubSourcePtr ssp;
16666   Boolean      rval = FALSE;
16667 
16668   if (biop == NULL) return FALSE;
16669 
16670   orgmod_subtype = GetOrgModQualFromSrcQual (srcqual, &subfield);
16671   if (orgmod_subtype == -1) {
16672     subsrc_subtype = GetSubSrcQualFromSrcQual (srcqual, &subfield);
16673     for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) {
16674       if (ssp->subtype == subsrc_subtype) {
16675         rval = TRUE;
16676       }
16677     }
16678   } else {
16679     if (biop->org != NULL && biop->org->orgname != NULL) {
16680       for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
16681         if (mod->subtype == orgmod_subtype) {
16682           rval = TRUE;
16683         }
16684       }
16685     }
16686   }
16687   return rval;
16688 }
16689 
16690 
IsSourceQualPresent(BioSourcePtr biop,SourceQualChoicePtr scp)16691 static Boolean IsSourceQualPresent (BioSourcePtr biop, SourceQualChoicePtr scp)
16692 {
16693   Boolean rval = FALSE;
16694   CharPtr   str;
16695 
16696   if (biop == NULL) return FALSE;
16697   if (scp == NULL) return TRUE;
16698 
16699   switch (scp->choice) {
16700     case SourceQualChoice_textqual:
16701       if (IsNonTextSourceQual (scp->data.intvalue)) {
16702         rval = IsNonTextSourceQualPresent (biop, scp->data.intvalue);
16703       } else {
16704         str = GetSourceQualFromBioSource (biop, scp, NULL);
16705         if (!StringHasNoText (str)) {
16706           rval = TRUE;
16707         }
16708         str = MemFree (str);
16709       }
16710       break;
16711     case SourceQualChoice_location:
16712       if (biop->genome != 0) {
16713         rval = TRUE;
16714       }
16715       break;
16716     case SourceQualChoice_origin:
16717       if (biop->origin != 0) {
16718         rval = TRUE;
16719       }
16720       break;
16721   }
16722   return rval;
16723 }
16724 
16725 
16726 typedef struct objecthasstring
16727 {
16728   StringConstraintPtr scp;
16729   Boolean             found;
16730 } ObjectHasStringData, PNTR ObjectHasStringPtr;
16731 
16732 
AsnWriteConstraintCallBack(AsnExpOptStructPtr pAEOS)16733 static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS)
16734 
16735 {
16736   CharPtr            pchSource;
16737   ObjectHasStringPtr ohsp;
16738 
16739   ohsp = (ObjectHasStringPtr) pAEOS->data;
16740   if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp)))
16741   {
16742       pchSource = (CharPtr) pAEOS->dvp->ptrvalue;
16743       ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp);
16744   }
16745 }
16746 
16747 
DoesObjectMatchStringConstraint(Uint1 choice,Pointer data,StringConstraintPtr scp)16748 static Boolean DoesObjectMatchStringConstraint (Uint1 choice, Pointer data, StringConstraintPtr scp)
16749 
16750 {
16751   ObjMgrPtr         omp;
16752   ObjMgrTypePtr     omtp;
16753   AsnIoPtr          aip;
16754   AsnExpOptPtr      aeop;
16755   ObjectHasStringData ohsd;
16756   SeqFeatPtr          sfp, prot;
16757   SeqMgrFeatContext   fcontext;
16758   CharPtr             search_txt;
16759   CGPSetPtr           c;
16760   ValNodePtr          vnp;
16761   Boolean             all_match = TRUE, any_match = FALSE, rval;
16762   BioseqPtr           protbsp;
16763   ImpFeatPtr          imp;
16764 
16765   if (data == NULL) return FALSE;
16766   if (scp == NULL) return TRUE;
16767 
16768   if (choice == 0) {
16769     /* CDS-Gene-Prot set */
16770     c = (CGPSetPtr) data;
16771     for (vnp = c->gene_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
16772       if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
16773         any_match = TRUE;
16774       } else {
16775         all_match = FALSE;
16776       }
16777     }
16778     for (vnp = c->cds_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
16779       if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
16780         any_match = TRUE;
16781       } else {
16782         all_match = FALSE;
16783       }
16784     }
16785     for (vnp = c->mrna_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
16786       if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
16787         any_match = TRUE;
16788       } else {
16789         all_match = FALSE;
16790       }
16791     }
16792     for (vnp = c->prot_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
16793       if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
16794         any_match = TRUE;
16795       } else {
16796         all_match = FALSE;
16797       }
16798     }
16799     if (scp->not_present) {
16800       rval = all_match;
16801     } else {
16802       rval = any_match;
16803     }
16804   } else {
16805     omp = ObjMgrGet ();
16806     omtp = ObjMgrTypeFind (omp, choice, NULL, NULL);
16807     if (omtp == NULL) return FALSE;
16808     aip = AsnIoNullOpen ();
16809     aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteConstraintCallBack);
16810     ohsd.found = FALSE;
16811     ohsd.scp = scp;
16812     if (aeop != NULL) {
16813       aeop->user_data = (Pointer) &ohsd;
16814     }
16815 
16816     (omtp->asnwrite) (data, aip, NULL);
16817 
16818     if (!ohsd.found && omtp->datatype == OBJ_SEQFEAT)
16819     {
16820       sfp = (SeqFeatPtr) data;
16821       if (sfp->data.choice == SEQFEAT_CDREGION) {
16822         protbsp = BioseqFindFromSeqLoc (sfp->product);
16823         prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
16824         if (prot != NULL) {
16825           (omtp->asnwrite) (prot, aip, NULL);
16826         }
16827       } else {
16828         if (SeqMgrFeaturesAreIndexed(sfp->idx.entityID) == 0) {
16829           SeqMgrIndexFeatures (sfp->idx.entityID, NULL);
16830         }
16831         if (sfp->idx.subtype == FEATDEF_tRNA) {
16832           sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &fcontext);
16833           ohsd.found = DoesSingleStringMatchConstraint (fcontext.label, ohsd.scp);
16834           if (!ohsd.found && sfp != NULL && sfp->idx.subtype == FEATDEF_tRNA)
16835           {
16836             search_txt = (CharPtr) MemNew ((StringLen (fcontext.label) + 6) * sizeof (Char));
16837             if (search_txt != NULL)
16838             {
16839               sprintf (search_txt, "tRNA-%s", fcontext.label);
16840               ohsd.found = DoesSingleStringMatchConstraint (search_txt, ohsd.scp);
16841               search_txt = MemFree (search_txt);
16842             }
16843           }
16844         } else if (!ohsd.found && sfp != NULL
16845                    && sfp->data.choice == SEQFEAT_IMP
16846                    && (imp = (ImpFeatPtr) sfp->data.value.ptrvalue) != NULL) {
16847           ohsd.found = DoesSingleStringMatchConstraint (imp->key, ohsd.scp);
16848         }
16849       }
16850     }
16851     AsnIoClose (aip);
16852     if (scp->not_present) {
16853       rval = !ohsd.found;
16854     } else {
16855       rval = ohsd.found;
16856     }
16857   }
16858   return rval;
16859 }
16860 
16861 
IsSourceConstraintEmpty(SourceConstraintPtr scp)16862 NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp)
16863 {
16864   if (scp == NULL) return TRUE;
16865 
16866   if (scp->field1 == NULL
16867       && scp->field2 == NULL
16868       && IsStringConstraintEmpty(scp->constraint)) {
16869     return TRUE;
16870   } else {
16871     return FALSE;
16872   }
16873 }
16874 
DoesBiosourceMatchConstraint(BioSourcePtr biop,SourceConstraintPtr scp)16875 NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp)
16876 {
16877   Boolean rval = FALSE;
16878   CharPtr str1, str2;
16879   ValNode vn;
16880 
16881   if (biop == NULL) return FALSE;
16882   if (scp == NULL) return TRUE;
16883 
16884   if (IsStringConstraintEmpty(scp->constraint)) {
16885     /* looking for qual present */
16886     if (scp->field1 != NULL && scp->field2 == NULL) {
16887       rval = IsSourceQualPresent (biop, scp->field1);
16888     } else if (scp->field2 != NULL && scp->field1 == NULL) {
16889       rval = IsSourceQualPresent (biop, scp->field2);
16890     /* looking for quals to match */
16891     } else if (scp->field1 != NULL && scp->field2 != NULL) {
16892       str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL);
16893       str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL);
16894       if (StringCmp (str1, str2) == 0) {
16895         rval = TRUE;
16896       }
16897       str1 = MemFree (str1);
16898       str2 = MemFree (str2);
16899     } else {
16900       /* nothing specified, automatic match */
16901       rval = TRUE;
16902     }
16903   } else {
16904     if (scp->field1 != NULL && scp->field2 == NULL) {
16905       if (AllowSourceQualMulti(scp->field1) && scp->constraint->not_present) {
16906         scp->constraint->not_present = FALSE;
16907         str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
16908         scp->constraint->not_present = TRUE;
16909         if (str1 != NULL) {
16910           rval = FALSE;
16911         } else {
16912           rval = TRUE;
16913         }
16914         str1 = MemFree (str1);
16915       } else {
16916         str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
16917         if (str1 == NULL) {
16918           if (scp->constraint->not_present) {
16919             str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL);
16920             if (str1 == NULL) {
16921               rval = TRUE;
16922             }
16923           }
16924         } else if (!StringHasNoText (str1)) {
16925           rval = TRUE;
16926         }
16927         str1 = MemFree (str1);
16928       }
16929     } else if (scp->field2 != NULL && scp->field1 == NULL) {
16930       str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint);
16931       if (str2 == NULL) {
16932         if (scp->constraint->not_present) {
16933           str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL);
16934           if (str2 == NULL) {
16935             rval = TRUE;
16936           }
16937         }
16938       } else if (!StringHasNoText (str2)) {
16939         rval = TRUE;
16940       }
16941       str2 = MemFree (str2);
16942     } else if (scp->field1 != NULL && scp->field2 != NULL) {
16943       str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
16944       str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint);
16945       if (StringCmp (str1, str2) == 0) {
16946         rval = TRUE;
16947       }
16948       str1 = MemFree (str1);
16949       str2 = MemFree (str2);
16950     } else {
16951       /* generic string constraint */
16952       vn.choice = Seq_descr_source;
16953       vn.next = NULL;
16954       vn.extended = 0;
16955       vn.data.ptrvalue = biop;
16956       rval = DoesObjectMatchStringConstraint (OBJ_SEQDESC, &vn, scp->constraint);
16957     }
16958   }
16959   return rval;
16960 }
16961 
16962 
DoesCGPSetMatchPseudoConstraint(CGPSetPtr c,CDSGeneProtPseudoConstraintPtr constraint)16963 static Boolean DoesCGPSetMatchPseudoConstraint (CGPSetPtr c, CDSGeneProtPseudoConstraintPtr constraint)
16964 {
16965   Boolean    any_pseudo = FALSE;
16966   ValNodePtr vnp;
16967   SeqFeatPtr sfp;
16968   Boolean    rval = FALSE;
16969 
16970   if (c == NULL) return FALSE;
16971   if (constraint == NULL) return TRUE;
16972 
16973   switch (constraint->feature) {
16974     case CDSGeneProt_feature_type_constraint_gene :
16975       for (vnp = c->gene_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
16976         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
16977         if (sfp != NULL && sfp->pseudo) {
16978           any_pseudo = TRUE;
16979         }
16980       }
16981       break;
16982     case CDSGeneProt_feature_type_constraint_mRNA :
16983       for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
16984         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
16985         if (sfp != NULL && sfp->pseudo) {
16986           any_pseudo = TRUE;
16987         }
16988       }
16989       break;
16990     case CDSGeneProt_feature_type_constraint_cds :
16991       for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
16992         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
16993         if (sfp != NULL && sfp->pseudo) {
16994           any_pseudo = TRUE;
16995         }
16996       }
16997       break;
16998     case CDSGeneProt_feature_type_constraint_prot :
16999       for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
17000         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
17001         if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_PROT) {
17002           any_pseudo = TRUE;
17003         }
17004       }
17005       break;
17006     case CDSGeneProt_feature_type_constraint_mat_peptide :
17007       for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
17008         sfp = (SeqFeatPtr) vnp->data.ptrvalue;
17009         if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_mat_peptide_aa) {
17010           any_pseudo = TRUE;
17011         }
17012       }
17013       break;
17014   }
17015 
17016   if ((any_pseudo && constraint->is_pseudo)
17017       || (!any_pseudo && !constraint->is_pseudo)) {
17018     rval = TRUE;
17019   }
17020   return rval;
17021 }
17022 
17023 
DoesFeatureMatchCGPPseudoConstraint(SeqFeatPtr sfp,CDSGeneProtPseudoConstraintPtr constraint)17024 static Boolean DoesFeatureMatchCGPPseudoConstraint (SeqFeatPtr sfp, CDSGeneProtPseudoConstraintPtr constraint)
17025 {
17026   Boolean    any_pseudo = FALSE;
17027   ValNodePtr feat_list, vnp;
17028   SeqFeatPtr gene, mrna, cds, prot;
17029   Boolean    rval = FALSE;
17030   SeqMgrFeatContext fcontext;
17031 
17032   if (sfp == NULL) return FALSE;
17033   if (constraint == NULL) return TRUE;
17034 
17035   switch (constraint->feature) {
17036     case CDSGeneProt_feature_type_constraint_gene :
17037       if (sfp->data.choice == SEQFEAT_GENE) {
17038         if (sfp->pseudo) {
17039           any_pseudo = TRUE;
17040         }
17041       } else if (sfp->data.choice == SEQFEAT_PROT) {
17042         cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext);
17043         if (cds != NULL) {
17044           gene = GetGeneForFeature (cds);
17045           if (gene != NULL && gene->pseudo) {
17046             any_pseudo = TRUE;
17047           }
17048         }
17049       } else {
17050         gene = GetGeneForFeature (sfp);
17051         if (gene != NULL && gene->pseudo) {
17052           any_pseudo = TRUE;
17053         }
17054       }
17055       break;
17056     case CDSGeneProt_feature_type_constraint_mRNA :
17057       if (sfp->idx.subtype == FEATDEF_mRNA) {
17058         if (sfp->pseudo) {
17059           any_pseudo = TRUE;
17060         }
17061       } else if (sfp->data.choice == SEQFEAT_PROT) {
17062         cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext);
17063         if (cds != NULL) {
17064           mrna = GetmRNAforCDS (cds);
17065           if (mrna != NULL && mrna->pseudo) {
17066             any_pseudo = TRUE;
17067           }
17068         }
17069       } else {
17070         mrna = GetmRNAforCDS (sfp);
17071         if (mrna != NULL && mrna->pseudo) {
17072           any_pseudo = TRUE;
17073         }
17074       }
17075       break;
17076     case CDSGeneProt_feature_type_constraint_cds :
17077       if (sfp->idx.subtype == FEATDEF_CDS) {
17078         if (sfp->pseudo) {
17079           any_pseudo = TRUE;
17080         }
17081       } else if (sfp->data.choice == SEQFEAT_PROT) {
17082         cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &fcontext);
17083         if (cds != NULL && cds->pseudo) {
17084           any_pseudo = TRUE;
17085         }
17086       } else {
17087         feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS);
17088         for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
17089           cds = vnp->data.ptrvalue;
17090           if (cds != NULL && cds->pseudo) {
17091             any_pseudo = TRUE;
17092           }
17093         }
17094         feat_list = ValNodeFree (feat_list);
17095       }
17096       break;
17097     case CDSGeneProt_feature_type_constraint_prot :
17098       if (sfp->idx.subtype == FEATDEF_PROT) {
17099         if (sfp->pseudo) {
17100           any_pseudo = TRUE;
17101         }
17102       } else if (sfp->data.choice == SEQFEAT_PROT) {
17103         prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->location), NULL, 0, FEATDEF_PROT, &fcontext);
17104         if (prot != NULL && prot->pseudo) {
17105           any_pseudo = TRUE;
17106         }
17107       } else if (sfp->idx.subtype == FEATDEF_CDS) {
17108         prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (sfp->product), NULL, 0, FEATDEF_PROT, &fcontext);
17109         if (prot != NULL && prot->pseudo) {
17110           any_pseudo = TRUE;
17111         }
17112       } else {
17113         feat_list = ListFeaturesInLocation (BioseqFindFromSeqLoc (sfp->location), sfp->location, SEQFEAT_CDREGION, FEATDEF_CDS);
17114         for (vnp = feat_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
17115           cds = vnp->data.ptrvalue;
17116           if (cds != NULL) {
17117             prot = SeqMgrGetNextFeature (BioseqFindFromSeqLoc (cds->product), NULL, 0, FEATDEF_PROT, &fcontext);
17118             if (prot != NULL && prot->pseudo) {
17119               any_pseudo = TRUE;
17120             }
17121           }
17122         }
17123         feat_list = ValNodeFree (feat_list);
17124       }
17125       break;
17126     case CDSGeneProt_feature_type_constraint_mat_peptide :
17127       if (sfp->idx.subtype == FEATDEF_mat_peptide_aa) {
17128         if (sfp->pseudo) {
17129           any_pseudo = TRUE;
17130         }
17131       }
17132       break;
17133   }
17134 
17135   if ((any_pseudo && constraint->is_pseudo)
17136       || (!any_pseudo && !constraint->is_pseudo)) {
17137     rval = TRUE;
17138   }
17139   return rval;
17140 }
17141 
17142 
IsCDSGeneProtQualConstraintEmpty(CDSGeneProtQualConstraintPtr constraint)17143 NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint)
17144 {
17145   if (constraint == NULL) return TRUE;
17146   if (constraint->field1 == NULL && constraint->field2 == NULL && IsStringConstraintEmpty (constraint->constraint)) {
17147     return TRUE;
17148   } else {
17149     return FALSE;
17150   }
17151 }
17152 
17153 
DoesCGPSetMatchQualConstraint(CGPSetPtr c,CDSGeneProtQualConstraintPtr constraint)17154 static Boolean DoesCGPSetMatchQualConstraint (CGPSetPtr c, CDSGeneProtQualConstraintPtr constraint)
17155 {
17156   Boolean rval = FALSE;
17157   CharPtr str, str1, str2;
17158 
17159   if (c == NULL) return FALSE;
17160   if (constraint == NULL) return TRUE;
17161 
17162   if (IsStringConstraintEmpty (constraint->constraint)) {
17163     /* looking for qual present */
17164     if (constraint->field1 != NULL && constraint->field2 == NULL) {
17165       str = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL);
17166       if (str != NULL) {
17167         rval = TRUE;
17168         str = MemFree (str);
17169       }
17170     } else if (constraint->field2 != NULL && constraint->field1 == NULL) {
17171       str = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL);
17172       if (str == NULL) {
17173         rval = FALSE;
17174       } else {
17175         str = MemFree (str);
17176       }
17177     /* looking for quals to match */
17178     } else if (constraint->field1 != NULL && constraint->field2 != NULL) {
17179       str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL);
17180       str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL);
17181       if (StringCmp (str1, str2) == 0) {
17182         rval = TRUE;
17183       }
17184       str1 = MemFree (str1);
17185       str2 = MemFree (str2);
17186     } else {
17187       /* nothing specified, automatic match */
17188       rval = TRUE;
17189     }
17190   } else {
17191     if (constraint->field1 != NULL && constraint->field2 == NULL) {
17192       str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint);
17193       if (str1 == NULL) {
17194         if (constraint->constraint->not_present) {
17195           str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL);
17196           if (str1 == NULL) {
17197             rval = TRUE;
17198           }
17199         }
17200       } else if (!StringHasNoText (str1)) {
17201         rval = TRUE;
17202       }
17203       str1 = MemFree (str1);
17204     } else if (constraint->field2 != NULL && constraint->field1 == NULL) {
17205       str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint);
17206       if (str2 == NULL) {
17207         if (constraint->constraint->not_present) {
17208           str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL);
17209           if (str2 == NULL) {
17210             rval = TRUE;
17211           }
17212         }
17213       } else if (!StringHasNoText (str2)) {
17214         rval = TRUE;
17215       }
17216       str2 = MemFree (str2);
17217     } else if (constraint->field1 != NULL && constraint->field2 != NULL) {
17218       str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint);
17219       str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint);
17220       if (StringCmp (str1, str2) == 0) {
17221         rval = TRUE;
17222       }
17223       str1 = MemFree (str1);
17224       str2 = MemFree (str2);
17225     } else {
17226       /* generic string constraint */
17227       rval = DoesObjectMatchStringConstraint (0, c, constraint->constraint);
17228     }
17229   }
17230   return rval;
17231 }
17232 
17233 
DoesSequenceHaveFeatureWithQualPresent(BioseqPtr bsp,FeatureFieldPtr feature_field,StringConstraintPtr scp)17234 static Boolean DoesSequenceHaveFeatureWithQualPresent (BioseqPtr bsp, FeatureFieldPtr feature_field, StringConstraintPtr scp)
17235 {
17236   Boolean           rval = FALSE;
17237   SeqFeatPtr        sfp, sfp_p;
17238   SeqMgrFeatContext context1, context2;
17239   Int4              featdef;
17240   Uint1             seqfeattype;
17241   CharPtr           str;
17242   BioseqPtr         prot_bsp;
17243 
17244   if (bsp == NULL) {
17245     return FALSE;
17246   } else if (feature_field == NULL) {
17247     return TRUE;
17248   }
17249   featdef = GetFeatdefFromFeatureType(feature_field->type);
17250   seqfeattype = FindFeatFromFeatDefType (featdef);
17251   if (seqfeattype == SEQFEAT_PROT) {
17252     for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1);
17253         sfp != NULL && !rval;
17254         sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) {
17255       prot_bsp = BioseqFindFromSeqLoc (sfp->product);
17256       for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2);
17257             sfp_p != NULL && !rval;
17258             sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) {
17259         str = GetQualFromFeature (sfp_p, feature_field, scp);
17260         if (str == NULL && scp != NULL) {
17261           if (scp->not_present) {
17262             str = GetQualFromFeature (sfp_p, feature_field, NULL);
17263             if (str == NULL) {
17264               rval = TRUE;
17265             }
17266           }
17267         } else if (!StringHasNoText (str)) {
17268           rval = TRUE;
17269         }
17270         str = MemFree (str);
17271       }
17272     }
17273   } else {
17274     for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1);
17275         sfp != NULL && !rval;
17276         sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) {
17277       str = GetQualFromFeature (sfp, feature_field, scp);
17278       if (str == NULL && scp != NULL) {
17279         if (scp->not_present) {
17280           str = GetQualFromFeature (sfp, feature_field, NULL);
17281           if (str == NULL) {
17282             rval = TRUE;
17283           }
17284         }
17285       } else if (!StringHasNoText (str)) {
17286         rval = TRUE;
17287       }
17288       str = MemFree (str);
17289     }
17290   }
17291   return rval;
17292 }
17293 
17294 
17295 static Boolean
DoesSequenceHaveFeatureWithMatchingQuals(BioseqPtr bsp,CDSGeneProtConstraintFieldPtr f1,CDSGeneProtConstraintFieldPtr f2,StringConstraintPtr scp)17296 DoesSequenceHaveFeatureWithMatchingQuals
17297 (BioseqPtr bsp,
17298  CDSGeneProtConstraintFieldPtr f1,
17299  CDSGeneProtConstraintFieldPtr f2,
17300  StringConstraintPtr           scp)
17301 {
17302   Int4              featdef;
17303   Uint1             seqfeattype;
17304   SeqFeatPtr        sfp, sfp_p;
17305   CharPtr           str, str2;
17306   SeqMgrFeatContext context1, context2;
17307   FeatureFieldPtr   feature_field1 = NULL, feature_field2 = NULL;
17308   CGPSetPtr         c;
17309   Boolean           b = FALSE;
17310   Boolean           rval = FALSE;
17311   BioseqPtr         prot_bsp;
17312 
17313   if (bsp == NULL || f1 == NULL || f2 == NULL) {
17314     return FALSE;
17315   }
17316   feature_field1 = FeatureFieldFromCDSGeneProtField(f1->data.intvalue);
17317   feature_field2 = FeatureFieldFromCDSGeneProtField(f2->data.intvalue);
17318 
17319   if (feature_field1 == NULL || feature_field2 == NULL) {
17320     feature_field1 = FeatureFieldFree (feature_field1);
17321     feature_field2 = FeatureFieldFree (feature_field2);
17322     return FALSE;
17323   }
17324 
17325   if (feature_field1->type == feature_field2->type) {
17326     featdef = GetFeatdefFromFeatureType(feature_field1->type);
17327     seqfeattype = FindFeatFromFeatDefType (featdef);
17328     if (seqfeattype == SEQFEAT_PROT) {
17329       for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1);
17330           sfp != NULL && !rval;
17331           sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) {
17332         prot_bsp = BioseqFindFromSeqLoc (sfp->product);
17333         for (sfp_p = SeqMgrGetNextFeature (prot_bsp, NULL, 0, featdef, &context2);
17334             sfp_p != NULL && !rval;
17335             sfp_p = SeqMgrGetNextFeature (prot_bsp, sfp_p, 0, featdef, &context2)) {
17336           str = GetQualFromFeature (sfp_p, feature_field1, scp);
17337           str2 = GetQualFromFeature (sfp_p, feature_field2, scp);
17338           if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) {
17339             rval = TRUE;
17340           }
17341           str = MemFree (str);
17342           str2 = MemFree (str2);
17343         }
17344       }
17345     } else {
17346       for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context1);
17347           sfp != NULL && !rval;
17348           sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &context1)) {
17349         str = GetQualFromFeature (sfp, feature_field1, scp);
17350         str2 = GetQualFromFeature (sfp, feature_field2, scp);
17351         if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) {
17352           rval = TRUE;
17353         }
17354         str = MemFree (str);
17355         str2 = MemFree (str2);
17356       }
17357     }
17358   } else {
17359     for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &context1);
17360         sfp != NULL && !rval;
17361         sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &context1)) {
17362       c = BuildCGPSetFromCodingRegion (sfp, &b);
17363       str = GetFieldValueFromCGPSet (c, f1->data.intvalue, scp);
17364       str2 = GetFieldValueFromCGPSet (c, f2->data.intvalue, scp);
17365       if (str != NULL && str2 != NULL && StringCmp (str, str2) == 0) {
17366         rval = TRUE;
17367       }
17368       str = MemFree (str);
17369       str2 = MemFree (str2);
17370       c = CGPSetFree (c);
17371     }
17372   }
17373   return rval;
17374 }
17375 
17376 
DoesSequenceMatchCGPQualConstraint(BioseqPtr bsp,CDSGeneProtQualConstraintPtr constraint)17377 static Boolean DoesSequenceMatchCGPQualConstraint (BioseqPtr bsp, CDSGeneProtQualConstraintPtr constraint)
17378 {
17379   FeatureFieldPtr feature_field;
17380   Boolean         rval = FALSE;
17381 
17382   if (bsp == NULL) {
17383     return FALSE;
17384   } else if (constraint == NULL) {
17385     return TRUE;
17386   }
17387 
17388   if (IsStringConstraintEmpty (constraint->constraint)) {
17389     /* looking for qual present */
17390     if ((constraint->field1 != NULL && constraint->field2 == NULL)
17391         || (constraint->field2 != NULL && constraint->field1 == NULL)) {
17392       if (constraint->field1 != NULL) {
17393         feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue);
17394       } else {
17395         feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue);
17396       }
17397       if (feature_field != NULL) {
17398         rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, NULL);
17399         feature_field = FeatureFieldFree (feature_field);
17400       }
17401     /* looking for quals to match */
17402     } else if (constraint->field1 != NULL && constraint->field2 != NULL) {
17403       rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, NULL);
17404     } else {
17405       /* nothing specified, automatic match */
17406       rval = TRUE;
17407     }
17408   } else if ((constraint->field1 != NULL && constraint->field2 == NULL)
17409              || (constraint->field1 == NULL && constraint->field2 != NULL)) {
17410     /* one field must match constraint */
17411     if (constraint->field1 != NULL) {
17412       feature_field = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue);
17413     } else {
17414       feature_field = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue);
17415     }
17416     if (feature_field != NULL) {
17417       rval = DoesSequenceHaveFeatureWithQualPresent (bsp, feature_field, constraint->constraint);
17418       feature_field = FeatureFieldFree (feature_field);
17419     }
17420   } else if (constraint->field1 != NULL && constraint->field2 != NULL) {
17421     /* two fields must match and match constraint */
17422     rval = DoesSequenceHaveFeatureWithMatchingQuals (bsp, constraint->field1, constraint->field2, constraint->constraint);
17423   } else {
17424     /* generic string constraint */
17425     rval = DoesObjectMatchStringConstraint (OBJ_BIOSEQ, bsp, constraint->constraint);
17426   }
17427   return rval;
17428 }
17429 
17430 
DoesSequenceInSetMatchCGPQualConstraint(BioseqSetPtr bssp,CDSGeneProtQualConstraintPtr constraint)17431 static Boolean DoesSequenceInSetMatchCGPQualConstraint (BioseqSetPtr bssp, CDSGeneProtQualConstraintPtr constraint)
17432 {
17433   Boolean       rval = FALSE;
17434   SeqEntryPtr   sep;
17435 
17436   if (bssp == NULL) return FALSE;
17437   if (constraint == NULL) return TRUE;
17438 
17439   for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) {
17440     if (IS_Bioseq (sep)) {
17441       rval = DoesSequenceMatchCGPQualConstraint ((BioseqPtr) sep->data.ptrvalue, constraint);
17442     } else if (IS_Bioseq_set (sep)) {
17443       rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint);
17444     }
17445   }
17446   return rval;
17447 }
17448 
17449 
DoesSeqDescMatchCGPQualConstraint(SeqDescrPtr sdp,CDSGeneProtQualConstraintPtr constraint)17450 static Boolean DoesSeqDescMatchCGPQualConstraint (SeqDescrPtr sdp, CDSGeneProtQualConstraintPtr constraint)
17451 {
17452   Boolean rval = FALSE;
17453   BioseqPtr bsp;
17454   ObjValNodePtr ovp;
17455 
17456   if (sdp == NULL) return FALSE;
17457   if (constraint == NULL) return TRUE;
17458 
17459   bsp = GetSequenceForObject (OBJ_SEQDESC, sdp);
17460   if (bsp == NULL) {
17461     if (sdp->extended) {
17462       ovp = (ObjValNodePtr) sdp;
17463       if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) {
17464         rval = DoesSequenceInSetMatchCGPQualConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint);
17465       }
17466     }
17467   } else {
17468     rval = DoesSequenceMatchCGPQualConstraint (bsp, constraint);
17469   }
17470 
17471   return rval;
17472 }
17473 
17474 
UnmarkFeatureList(ValNodePtr list)17475 static void UnmarkFeatureList (ValNodePtr list)
17476 {
17477   SeqFeatPtr sfp;
17478 
17479   while (list != NULL)
17480   {
17481     sfp = list->data.ptrvalue;
17482     if (sfp != NULL)
17483     {
17484       sfp->idx.deleteme = FALSE;
17485     }
17486     list = list->next;
17487   }
17488 }
17489 
17490 
FillOutCGPSetForGene(CGPSetPtr c,SeqFeatPtr gene)17491 static void FillOutCGPSetForGene (CGPSetPtr c, SeqFeatPtr gene)
17492 {
17493   SeqFeatPtr cds, mrna, prot;
17494   SeqMgrFeatContext fcontext, pcontext;
17495   BioseqPtr         bsp, protbsp;
17496   Int4              left, right, tmp;
17497   ValNodeBlock      cds_list, mrna_list, prot_list;
17498 
17499   if (c == NULL || gene == NULL || (bsp = BioseqFindFromSeqLoc (gene->location)) == NULL)
17500   {
17501     return;
17502   }
17503 
17504   InitValNodeBlock(&cds_list, c->cds_list);
17505   InitValNodeBlock(&mrna_list, c->mrna_list);
17506   InitValNodeBlock(&prot_list, c->prot_list);
17507 
17508   left = SeqLocStart (gene->location);
17509   right = SeqLocStop (gene->location);
17510   if (left > right) {
17511     tmp = left;
17512     left = right;
17513     right = tmp;
17514   }
17515 
17516   for (cds = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
17517        cds != NULL && fcontext.left <= right;
17518        cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &fcontext))
17519   {
17520     if (gene == GetGeneForFeature (cds)) {
17521       ValNodeAddPointerToEnd (&cds_list, 0, cds);
17522       mrna = GetmRNAforCDS (cds);
17523       if (mrna != NULL)
17524       {
17525         ValNodeAddPointerToEnd (&mrna_list, 0, mrna);
17526       }
17527 
17528       if (cds->product != NULL)
17529       {
17530         protbsp = BioseqFindFromSeqLoc (cds->product);
17531         if (protbsp != NULL)
17532         {
17533           prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &pcontext);
17534           if (prot != NULL)
17535           {
17536             ValNodeAddPointerToEnd (&prot_list, 0, prot);
17537           }
17538 
17539           /* also add in mat_peptides from protein feature */
17540           prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &pcontext);
17541           while (prot != NULL)
17542           {
17543             ValNodeAddPointerToEnd (&prot_list, 0, prot);
17544             prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &pcontext);
17545           }
17546         }
17547       }
17548     }
17549   }
17550 
17551   for (mrna = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_mRNA, &fcontext);
17552        mrna != NULL && fcontext.left <= right;
17553        mrna = SeqMgrGetNextFeature (bsp, mrna, 0, FEATDEF_mRNA, &fcontext))
17554   {
17555     if (gene == GetGeneForFeature (mrna)) {
17556       ValNodeAddPointerToEnd (&mrna_list, 0, mrna);
17557     }
17558   }
17559   c->cds_list = cds_list.head;
17560   c->mrna_list = mrna_list.head;
17561   c->prot_list = prot_list.head;
17562 }
17563 
17564 
FillOutCGPSetForGeneList(CGPSetPtr c)17565 static void FillOutCGPSetForGeneList (CGPSetPtr c)
17566 {
17567   ValNodePtr vnp;
17568 
17569   if (c == NULL) {
17570     return;
17571   }
17572 
17573   c->gene_list = ValNodeSort (c->gene_list, SortVnpByChoiceAndPtrvalue);
17574   ValNodeUnique (&c->gene_list, SortVnpByChoiceAndPtrvalue, ValNodeFree);
17575 
17576   for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next)
17577   {
17578     FillOutCGPSetForGene (c, vnp->data.ptrvalue);
17579   }
17580   c->cds_list = ValNodeSort (c->cds_list, SortVnpByChoiceAndPtrvalue);
17581   ValNodeUnique (&c->cds_list, SortVnpByChoiceAndPtrvalue, ValNodeFree);
17582   c->mrna_list = ValNodeSort (c->mrna_list, SortVnpByChoiceAndPtrvalue);
17583   ValNodeUnique (&c->mrna_list, SortVnpByChoiceAndPtrvalue, ValNodeFree);
17584   c->prot_list = ValNodeSort (c->prot_list, SortVnpByChoiceAndPtrvalue);
17585   ValNodeUnique (&c->prot_list, SortVnpByChoiceAndPtrvalue, ValNodeFree);
17586 }
17587 
17588 
DoesFeatureMatchCGPQualConstraint(SeqFeatPtr sfp,CDSGeneProtQualConstraintPtr constraint)17589 static Boolean DoesFeatureMatchCGPQualConstraint (SeqFeatPtr sfp, CDSGeneProtQualConstraintPtr constraint)
17590 {
17591   CGPSetPtr c = NULL;
17592   Boolean   b = FALSE;
17593   SeqMgrFeatContext context;
17594   Boolean           rval = FALSE;
17595   FeatureFieldPtr   ff;
17596   SeqFeatPtr        cds;
17597   CharPtr           str1 = NULL, str2 = NULL;
17598 
17599   if (sfp == NULL) {
17600     return FALSE;
17601   } else if (constraint == NULL) {
17602     return TRUE;
17603   }
17604 
17605   if (sfp->data.choice == SEQFEAT_CDREGION) {
17606     c = BuildCGPSetFromCodingRegion (sfp, &b);
17607   } else if (sfp->data.choice == SEQFEAT_PROT) {
17608     cds = SeqMgrGetCDSgivenProduct (BioseqFindFromSeqLoc (sfp->location), &context);
17609     c = BuildCGPSetFromCodingRegion (cds, &b);
17610   } else if (sfp->data.choice == SEQFEAT_GENE) {
17611     c = BuildCGPSetFromGene (sfp);
17612     FillOutCGPSetForGeneList (c);
17613   } else if (sfp->data.choice == SEQFEAT_RNA) {
17614     c = BuildCGPSetFrommRNA (sfp);
17615   }
17616   if (c == NULL) {
17617     return FALSE;
17618   }
17619   UnmarkFeatureList (c->cds_list);
17620   UnmarkFeatureList (c->mrna_list);
17621   UnmarkFeatureList (c->gene_list);
17622 
17623   rval = DoesCGPSetMatchQualConstraint (c, constraint);
17624   if (rval && sfp->idx.subtype == FEATDEF_mat_peptide_aa) {
17625     if (constraint->field1 != NULL) {
17626       if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field1->data.intvalue)) {
17627         ff = FeatureFieldFromCDSGeneProtField (constraint->field1->data.intvalue);
17628         str1 = GetQualFromFeature (sfp, ff, constraint->constraint);
17629         ff = FeatureFieldFree (ff);
17630       } else {
17631         str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint);
17632       }
17633       if (str1 == NULL) {
17634         rval = FALSE;
17635       }
17636     }
17637     if (constraint->field2 != NULL) {
17638       if (IsCDSGeneProtFieldMatPeptideRelated (constraint->field2->data.intvalue)) {
17639         ff = FeatureFieldFromCDSGeneProtField (constraint->field2->data.intvalue);
17640         str2 = GetQualFromFeature (sfp, ff, constraint->constraint);
17641         ff = FeatureFieldFree (ff);
17642       } else {
17643         str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint);
17644       }
17645       if (str2 == NULL) {
17646         rval = FALSE;
17647       }
17648     }
17649     if (rval && constraint->field1 != NULL && constraint->field2 != NULL && StringCmp (str1, str2) != 0) {
17650       rval = FALSE;
17651     }
17652     str1 = MemFree (str1);
17653     str2 = MemFree (str2);
17654   }
17655   c = CGPSetFree (c);
17656   return rval;
17657 }
17658 
17659 
IsSequenceConstraintEmpty(SequenceConstraintPtr constraint)17660 NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint)
17661 {
17662   if (constraint == NULL) return TRUE;
17663   if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE;
17664   if (constraint->feature != Macro_feature_type_any) return FALSE;
17665   /* note - having a num_type_features not be null isn't enough to make the constraint non-empty */
17666   if (!IsStringConstraintEmpty (constraint->id)) return FALSE;
17667   if (constraint->num_features != NULL) return FALSE;
17668   if (constraint->length != NULL) return FALSE;
17669   if (constraint->strandedness != Feature_strandedness_constraint_any) return FALSE;
17670   return TRUE;
17671 }
17672 
17673 
CopyListWithoutBankIt(CharPtr orig)17674 static CharPtr CopyListWithoutBankIt (CharPtr orig)
17675 {
17676   CharPtr cpy, src, dst;
17677 
17678   if (orig == NULL)
17679   {
17680     return NULL;
17681   }
17682   cpy = StringSave (orig);
17683 
17684   src = orig;
17685   dst = cpy;
17686   while (*src != 0)
17687   {
17688     if ((*src == 'B' || *src == 'b')
17689         && (src == orig || isspace (*(src - 1)) || *(src - 1) == ',' || *(src - 1) == ';')
17690         && StringNICmp (src, "BankIt", 6) == 0)
17691     {
17692       src += 6;
17693       while (*src == '/' || *src == ':' || *src == ' ')
17694       {
17695         src++;
17696       }
17697     }
17698     else
17699     {
17700       *dst = *src;
17701       ++dst;
17702       ++src;
17703     }
17704   }
17705   *dst = 0;
17706   return cpy;
17707 }
17708 
17709 
DoesTextMatchBankItId(SeqIdPtr sip,StringConstraintPtr scp)17710 static Boolean DoesTextMatchBankItId (SeqIdPtr sip, StringConstraintPtr scp)
17711 {
17712   Boolean rval = FALSE;
17713   Int4    offset;
17714   CharPtr text, tmp, cp, partial_match;
17715   Char    ch_orig = 0;
17716   DbtagPtr dbtag;
17717 
17718   if (scp == NULL || scp->match_text == NULL || sip == NULL || sip->choice != SEQID_GENERAL) {
17719     return FALSE;
17720   }
17721   dbtag = (DbtagPtr) sip->data.ptrvalue;
17722   if (dbtag == NULL || StringCmp (dbtag->db, "BankIt") != 0 || dbtag->tag == NULL) {
17723     return FALSE;
17724   }
17725   text = CopyListWithoutBankIt (scp->match_text);
17726 
17727   tmp = scp->match_text;
17728   scp->match_text = text;
17729   rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp);
17730   if (!rval) {
17731     offset = StringCSpn (text, "/ ");
17732     if (text[offset] != 0) {
17733       ch_orig = text[offset];
17734       text[offset] = '_';
17735       rval = DoesObjectIdMatchStringConstraint (dbtag->tag, scp);
17736       text[offset] = ch_orig;
17737     }
17738   }
17739   if (!rval && ch_orig != '/' && dbtag->tag->str != NULL && (cp = StringChr (dbtag->tag->str, '/')) != NULL) {
17740     partial_match = StringSave (dbtag->tag->str);
17741     partial_match[cp - dbtag->tag->str] = 0;
17742     rval = DoesStringMatchConstraint (partial_match, scp);
17743     partial_match = MemFree (partial_match);
17744   }
17745   scp->match_text = tmp;
17746   text = MemFree (text);
17747 
17748   return rval;
17749 }
17750 
17751 
DoesSeqIDListMeetStringConstraint(SeqIdPtr sip,StringConstraintPtr string_constraint)17752 NLM_EXTERN Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint)
17753 {
17754   CharPtr    id;
17755   CharPtr    cp, cp_dst;
17756   SeqIdPtr   tmp;
17757   Boolean    match, changed;
17758   DbtagPtr   dbtag;
17759   CharPtr    tmp_id;
17760 
17761   if (sip == NULL)
17762   {
17763     return FALSE;
17764   }
17765   if (string_constraint == NULL)
17766   {
17767     return TRUE;
17768   }
17769 
17770   while (sip != NULL)
17771   {
17772     /* temporary disconnect ID from list */
17773     tmp = sip->next;
17774     sip->next = NULL;
17775     id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG);
17776     match = DoesSingleStringMatchConstraint (id, string_constraint);
17777     if (!match)
17778     {
17779       changed = FALSE;
17780       /* remove terminating pipe character */
17781       if (id[StringLen(id) - 1] == '|')
17782       {
17783         id[StringLen(id) - 1] = 0;
17784         changed = TRUE;
17785       }
17786       /* remove leading pipe identifier */
17787       cp = StringChr (id, '|');
17788       if (cp != NULL)
17789       {
17790         changed = TRUE;
17791         cp++;
17792         cp_dst = id;
17793         while (*cp != 0)
17794         {
17795           *cp_dst = *cp;
17796           cp_dst++;
17797           cp++;
17798         }
17799         *cp_dst = 0;
17800       }
17801       if (changed)
17802       {
17803         match = DoesSingleStringMatchConstraint (id, string_constraint);
17804       }
17805 
17806       /* if search text doesn't have ., try ID without version */
17807       if (!match && StringChr (string_constraint->match_text, '.') == NULL)
17808       {
17809         cp = StringChr (id, '.');
17810         if (cp != NULL)
17811         {
17812           *cp = 0;
17813           match = DoesSingleStringMatchConstraint (id, string_constraint);
17814           *cp = '.';
17815         }
17816       }
17817 
17818       /* Bankit? */
17819       if (!match && DoesTextMatchBankItId (sip, string_constraint))
17820       {
17821         match = TRUE;
17822       }
17823 
17824       if (!match && sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) {
17825         dbtag = (DbtagPtr) sip->data.ptrvalue;
17826         if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) {
17827           if (DoesSingleStringMatchConstraint (dbtag->tag->str, string_constraint)) {
17828             match = TRUE;
17829           } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) {
17830             tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1));
17831             StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str);
17832             tmp_id[cp - dbtag->tag->str] = 0;
17833             if (DoesSingleStringMatchConstraint (tmp_id, string_constraint)) {
17834               match = TRUE;
17835             }
17836             tmp_id = MemFree (tmp_id);
17837           }
17838         }
17839       }
17840     }
17841     id = MemFree (id);
17842     sip->next = tmp;
17843 
17844     if (match)
17845     {
17846       if (string_constraint->not_present)
17847       {
17848         return FALSE;
17849       }
17850       else
17851       {
17852         return TRUE;
17853       }
17854     }
17855     sip = sip->next;
17856   }
17857   if (string_constraint->not_present)
17858   {
17859     return TRUE;
17860   }
17861   else
17862   {
17863     return FALSE;
17864   }
17865 }
17866 
17867 
17868 typedef struct rnatypebiomol {
17869   Int4 rnatype;
17870   Uint1 biomol;
17871   CharPtr rnamolname;
17872 } RnaTypeBiomolData, PNTR RnaTypeBiomolPtr;
17873 
17874 static RnaTypeBiomolData rna_type_biomol[] = {
17875 { Sequence_constraint_rnamol_genomic , MOLECULE_TYPE_GENOMIC, "Genomic RNA" } ,
17876 { Sequence_constraint_rnamol_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "Precursor RNA" } ,
17877 { Sequence_constraint_rnamol_mRNA , MOLECULE_TYPE_MRNA , "mRNA [cDNA]" } ,
17878 { Sequence_constraint_rnamol_rRNA , MOLECULE_TYPE_RRNA , "Ribosomal RNA" } ,
17879 { Sequence_constraint_rnamol_tRNA , MOLECULE_TYPE_TRNA , "Transfer RNA" } ,
17880 { Sequence_constraint_rnamol_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "Genomic-mRNA" } ,
17881 { Sequence_constraint_rnamol_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } ,
17882 { Sequence_constraint_rnamol_transcribed_RNA , MOLECULE_TYPE_TRANSCRIBED_RNA , "Transcribed RNA" } ,
17883 { Sequence_constraint_rnamol_ncRNA , MOLECULE_TYPE_NCRNA , "Non-coding  RNA" } ,
17884 { Sequence_constraint_rnamol_transfer_messenger_RNA , MOLECULE_TYPE_TMRNA , "Transfer-messenger RNA" } } ;
17885 
17886 #define NUM_rna_type_biomol sizeof (rna_type_biomol) / sizeof (RnaTypeBiomolData)
17887 
17888 
GetBiomolForRnaType(Int4 rnatype)17889 NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype)
17890 {
17891   Int4 i;
17892 
17893   for (i = 0; i <  NUM_rna_type_biomol; i++) {
17894     if (rna_type_biomol[i].rnatype == rnatype) {
17895       return rna_type_biomol[i].biomol;
17896     }
17897   }
17898   return 0;
17899 }
17900 
17901 
GetBiomolNameForRnaType(Int4 rnatype)17902 NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype)
17903 {
17904   Int4 i;
17905 
17906   for (i = 0; i <  NUM_rna_type_biomol; i++) {
17907     if (rna_type_biomol[i].rnatype == rnatype) {
17908       return rna_type_biomol[i].rnamolname;
17909     }
17910   }
17911   return "invalid RNA type";
17912 }
17913 
AddAllRNASubtypesToChoiceList(ValNodePtr PNTR field_list)17914 NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list)
17915 {
17916   Int4 i;
17917 
17918   if (field_list == NULL) return;
17919 
17920   ValNodeAddPointer (field_list, Sequence_constraint_rnamol_any, StringSave ("Any RNA"));
17921   for (i = 0; i < NUM_rna_type_biomol; i++) {
17922     ValNodeAddPointer (field_list, rna_type_biomol[i].rnatype, StringSave (rna_type_biomol[i].rnamolname));
17923   }
17924 }
17925 
17926 
DoesValueMatchQuantityConstraint(Int4 val,ValNodePtr quantity)17927 static Boolean DoesValueMatchQuantityConstraint (Int4 val, ValNodePtr quantity)
17928 {
17929   Boolean rval = TRUE;
17930 
17931   if (quantity == NULL) {
17932     rval = TRUE;
17933   } else if (quantity->choice == QuantityConstraint_equals
17934              && val != quantity->data.intvalue) {
17935     return FALSE;
17936   } else if (quantity->choice == QuantityConstraint_greater_than
17937              && val <= quantity->data.intvalue) {
17938     return FALSE;
17939   } else if (quantity->choice == QuantityConstraint_less_than
17940              && val >= quantity->data.intvalue) {
17941     return FALSE;
17942   }
17943   return rval;
17944 }
17945 
17946 
DoesSequenceMatchStrandednessConstraint(BioseqPtr bsp,Uint2 strandedness)17947 static Boolean DoesSequenceMatchStrandednessConstraint (BioseqPtr bsp, Uint2 strandedness)
17948 {
17949   SeqMgrFeatContext context;
17950   SeqFeatPtr sfp;
17951   Int4 num_minus = 0;
17952   Int4 num_plus = 0;
17953   Boolean rval = FALSE;
17954 
17955   if (bsp == NULL) {
17956     return FALSE;
17957   } else if (strandedness == Feature_strandedness_constraint_any) {
17958     return TRUE;
17959   }
17960 
17961   sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
17962   while (sfp != NULL) {
17963     if (context.strand == Seq_strand_minus) {
17964       num_minus++;
17965       if (strandedness == Feature_strandedness_constraint_plus_only
17966           || strandedness == Feature_strandedness_constraint_no_minus) {
17967         return FALSE;
17968       } else if (strandedness == Feature_strandedness_constraint_at_least_one_minus) {
17969         return TRUE;
17970       }
17971     } else {
17972       num_plus++;
17973       if (strandedness == Feature_strandedness_constraint_minus_only
17974           || strandedness == Feature_strandedness_constraint_no_plus) {
17975         return FALSE;
17976       } else if (strandedness == Feature_strandedness_constraint_at_least_one_plus) {
17977         return TRUE;
17978       }
17979     }
17980     sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context);
17981   }
17982 
17983   switch (strandedness) {
17984     case Feature_strandedness_constraint_minus_only:
17985       if (num_minus > 0 && num_plus == 0) {
17986         rval = TRUE;
17987       }
17988       break;
17989     case Feature_strandedness_constraint_plus_only:
17990       if (num_plus > 0 && num_minus == 0) {
17991         rval = TRUE;
17992       }
17993       break;
17994     case Feature_strandedness_constraint_at_least_one_minus:
17995       if (num_minus > 0) {
17996         rval = TRUE;
17997       }
17998       break;
17999     case Feature_strandedness_constraint_at_least_one_plus:
18000       if (num_plus > 0) {
18001         rval = TRUE;
18002       }
18003       break;
18004     case Feature_strandedness_constraint_no_minus:
18005       if (num_minus == 0) {
18006         rval = TRUE;
18007       }
18008       break;
18009     case Feature_strandedness_constraint_no_plus:
18010       if (num_plus == 0) {
18011         rval = TRUE;
18012       }
18013       break;
18014   }
18015   return rval;
18016 }
18017 
18018 
DoesFeatureCountMatchQuantityConstraint(BioseqPtr bsp,Uint2 featdef,ValNodePtr quantity)18019 static Boolean DoesFeatureCountMatchQuantityConstraint (BioseqPtr bsp, Uint2 featdef, ValNodePtr quantity)
18020 {
18021   Int4              num_features = 0;
18022   SeqFeatPtr        sfp;
18023   SeqMgrFeatContext fcontext;
18024 
18025   for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, (Uint1)featdef, &fcontext);
18026        sfp != NULL;
18027        sfp = SeqMgrGetNextFeature (bsp, sfp, 0, (Uint1)featdef, &fcontext))
18028   {
18029     num_features++;
18030     /* note - break out of loop or return as soon as we know constraint
18031      * succeeds or passes - no need to iterate through all features
18032      */
18033     if (quantity == NULL)
18034     {
18035       return TRUE;
18036     }
18037     else if (quantity->choice == QuantityConstraint_equals
18038           && num_features > quantity->data.intvalue)
18039     {
18040       return FALSE;
18041     }
18042     else if (quantity->choice == QuantityConstraint_greater_than
18043                  && num_features > quantity->data.intvalue)
18044     {
18045       break;
18046     }
18047     else if (quantity->choice == QuantityConstraint_less_than
18048                  && num_features >= quantity->data.intvalue)
18049     {
18050         return FALSE;
18051     }
18052   }
18053   if (quantity == NULL) {
18054     return FALSE;
18055   } else if (!DoesValueMatchQuantityConstraint(num_features, quantity)) {
18056     return FALSE;
18057   } else {
18058     return TRUE;
18059   }
18060 }
18061 
18062 
DoesSequenceMatchSequenceConstraint(BioseqPtr bsp,SequenceConstraintPtr constraint)18063 NLM_EXTERN Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint)
18064 {
18065   SeqDescrPtr sdp;
18066   SeqMgrDescContext dcontext;
18067   MolInfoPtr mip;
18068 
18069   if (bsp == NULL) return FALSE;
18070   if (IsSequenceConstraintEmpty (constraint)) return TRUE;
18071 
18072   if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) {
18073     switch (constraint->seqtype->choice) {
18074       case SequenceConstraintMolTypeConstraint_nucleotide :
18075         if (ISA_aa (bsp->mol)) {
18076           return FALSE;
18077         }
18078         break;
18079       case SequenceConstraintMolTypeConstraint_dna :
18080         if (bsp->mol != Seq_mol_dna) {
18081           return FALSE;
18082         }
18083         break;
18084       case SequenceConstraintMolTypeConstraint_rna :
18085         if (bsp->mol != Seq_mol_rna) {
18086           return FALSE;
18087         }
18088         if (constraint->seqtype->data.intvalue != Sequence_constraint_rnamol_any) {
18089           sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
18090           if (sdp == NULL || sdp->data.ptrvalue == NULL || sdp->choice != Seq_descr_molinfo) {
18091             return FALSE;
18092           }
18093           mip = (MolInfoPtr) sdp->data.ptrvalue;
18094           if (GetBiomolForRnaType (constraint->seqtype->data.intvalue) != mip->biomol) {
18095             return FALSE;
18096           }
18097         }
18098         break;
18099       case SequenceConstraintMolTypeConstraint_protein :
18100         if (!ISA_aa (bsp->mol)) {
18101           return FALSE;
18102         }
18103         break;
18104     }
18105   }
18106 
18107   if (constraint->feature != Macro_feature_type_any) {
18108     if (!DoesFeatureCountMatchQuantityConstraint (bsp, GetFeatdefFromFeatureType (constraint->feature), constraint->num_type_features)) {
18109       return FALSE;
18110     }
18111   }
18112 
18113   if (!IsStringConstraintEmpty (constraint->id) && !DoesSeqIDListMeetStringConstraint (bsp->id, constraint->id)) {
18114     return FALSE;
18115   }
18116 
18117   if (constraint->num_features != NULL) {
18118     if (!DoesFeatureCountMatchQuantityConstraint (bsp, 0, constraint->num_features)) {
18119       return FALSE;
18120     }
18121   }
18122 
18123   if (!DoesValueMatchQuantityConstraint(bsp->length, constraint->length)) {
18124     return FALSE;
18125   }
18126 
18127   if (!DoesSequenceMatchStrandednessConstraint(bsp, constraint->strandedness)) {
18128     return FALSE;
18129   }
18130 
18131   return TRUE;
18132 }
18133 
DoesSequenceInSetMatchSequenceConstraint(BioseqSetPtr bssp,SequenceConstraintPtr constraint)18134 static Boolean DoesSequenceInSetMatchSequenceConstraint (BioseqSetPtr bssp, SequenceConstraintPtr constraint)
18135 {
18136   Boolean       rval = FALSE;
18137   SeqEntryPtr   sep;
18138 
18139   if (bssp == NULL) return FALSE;
18140   if (IsSequenceConstraintEmpty (constraint)) return TRUE;
18141 
18142   for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) {
18143     if (IS_Bioseq (sep)) {
18144       rval = DoesSequenceMatchSequenceConstraint ((BioseqPtr) sep->data.ptrvalue, constraint);
18145     } else if (IS_Bioseq_set (sep)) {
18146       rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint);
18147     }
18148   }
18149   return rval;
18150 }
18151 
18152 
DoesObjectMatchSequenceConstraint(Uint1 choice,Pointer data,SequenceConstraintPtr constraint)18153 static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, SequenceConstraintPtr constraint)
18154 {
18155   BioseqPtr bsp;
18156   SeqDescrPtr sdp;
18157   ObjValNodePtr ovp;
18158   Boolean       rval = FALSE;
18159 
18160   if (data == NULL) return FALSE;
18161   if (IsSequenceConstraintEmpty (constraint)) return TRUE;
18162 
18163   bsp = GetSequenceForObject (choice, data);
18164   if (bsp == NULL) {
18165     if (choice == OBJ_SEQDESC) {
18166       sdp = (SeqDescrPtr) data;
18167       if (sdp->extended) {
18168         ovp = (ObjValNodePtr) sdp;
18169         if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) {
18170           rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint);
18171         }
18172       }
18173     }
18174   } else {
18175     rval = DoesSequenceMatchSequenceConstraint (bsp, constraint);
18176   }
18177   return rval;
18178 }
18179 
18180 
18181 /* Pub fields */
18182 typedef struct pubfieldlabel {
18183   Int4 pub_field;
18184   CharPtr name;
18185 } PubFieldLabelData, PNTR PubFieldLabelPtr;
18186 
18187 
18188 static PubFieldLabelData pubfield_labels[] = {
18189   { Publication_field_cit, "citation" } ,
18190   { Publication_field_authors, "authors" } ,
18191   { Publication_field_journal, "journal" } ,
18192   { Publication_field_volume, "volume" } ,
18193   { Publication_field_issue, "issue" } ,
18194   { Publication_field_pages, "pages" } ,
18195   { Publication_field_date, "date" } ,
18196   { Publication_field_serial_number, "serial number" } ,
18197   { Publication_field_title, "title" } ,
18198   { Publication_field_affiliation, "affiliation" } ,
18199   { Publication_field_affil_div, "department" } ,
18200   { Publication_field_affil_city, "city" } ,
18201   { Publication_field_affil_sub, "state" } ,
18202   { Publication_field_affil_country, "country" } ,
18203   { Publication_field_affil_street, "street" } ,
18204   { Publication_field_affil_email, "email" } ,
18205   { Publication_field_affil_fax, "fax" } ,
18206   { Publication_field_affil_phone, "phone" } ,
18207   { Publication_field_affil_zipcode, "postal code" } ,
18208   { Publication_field_pmid, "PMID"} ,
18209   { Publication_field_pub_class, "class" }
18210 };
18211 
18212 #define NUM_pubfield_labels sizeof (pubfield_labels) / sizeof (PubFieldLabelData)
18213 
18214 
GetPubFieldLabel(Int4 pub_field)18215 NLM_EXTERN CharPtr GetPubFieldLabel (Int4 pub_field)
18216 {
18217   CharPtr rval = NULL;
18218   Int4 i;
18219 
18220   for (i = 0; i < NUM_pubfield_labels; i++) {
18221     if (pubfield_labels[i].pub_field == pub_field) {
18222       rval = pubfield_labels[i].name;
18223       break;
18224     }
18225   }
18226   return rval;
18227 }
18228 
18229 
GetPubFieldFromLabel(CharPtr label)18230 NLM_EXTERN Int4 GetPubFieldFromLabel(CharPtr label)
18231 {
18232   Int4 rval = -1;
18233   Int4 i;
18234 
18235   if (StringNICmp (label, "publication", 11) == 0) {
18236     label = label + 11;
18237     while (*label == '-' || *label == ' ') {
18238       label++;
18239     }
18240   }
18241 
18242   for (i = 0; i < NUM_pubfield_labels; i++) {
18243     if (StringsAreEquivalent(pubfield_labels[i].name, label)) {
18244       rval = pubfield_labels[i].pub_field;
18245       break;
18246     }
18247   }
18248   return rval;
18249 }
18250 
18251 
GetPubFieldList(void)18252 NLM_EXTERN ValNodePtr GetPubFieldList (void)
18253 {
18254   ValNodePtr         val_list = NULL;
18255   Int4 i;
18256 
18257   for (i = 0; i < NUM_pubfield_labels; i++) {
18258     ValNodeAddPointer (&val_list, pubfield_labels[i].pub_field, StringSave (pubfield_labels[i].name));
18259   }
18260 
18261   return val_list;
18262 }
18263 
18264 
MakePubFieldTypeList(void)18265 static ValNodePtr MakePubFieldTypeList (void)
18266 {
18267   ValNodePtr field_list = NULL;
18268   Int4 i;
18269 
18270   for (i = 0; i < NUM_pubfield_labels; i++) {
18271     ValNodeAddInt (&field_list, FieldType_pub, pubfield_labels[i].pub_field);
18272   }
18273 
18274   return field_list;
18275 }
18276 
18277 
18278 typedef struct pub_class_qual {
18279   Uint1 pub_choice;
18280   Int4 status;
18281   Uint1 art_from;
18282   CharPtr name;
18283 } PubClassQualData, PNTR PubClassQualPtr;
18284 
18285 
18286 static PubClassQualData pub_class_quals[] = {
18287   { PUB_Gen, Pub_type_unpublished, 0, "unpublished" } ,
18288   { PUB_Sub, Pub_type_in_press, 0, "in-press submission" } ,
18289   { PUB_Sub, Pub_type_published, 0, "submission" } ,
18290   { PUB_Article, Pub_type_in_press, 1, "in-press journal" } ,
18291   { PUB_Article, Pub_type_published, 1, "journal" } ,
18292   { PUB_Article, Pub_type_in_press, 2, "in-press book chapter" } ,
18293   { PUB_Article, Pub_type_published, 2, "book chapter" } ,
18294   { PUB_Article, Pub_type_in_press, 3, "in-press proceedings chapter" } ,
18295   { PUB_Article, Pub_type_published, 3, "proceedings chapter" } ,
18296   { PUB_Book, Pub_type_in_press, 0, "in-press book" } ,
18297   { PUB_Book, Pub_type_published, 0, "book" } ,
18298   { PUB_Man, Pub_type_in_press, 0, "in-press thesis" } ,
18299   { PUB_Man, Pub_type_published, 0, "thesis" } ,
18300   { PUB_Proc, Pub_type_in_press, 0, "in-press proceedings" } ,
18301   { PUB_Proc, Pub_type_published, 0, "proceedings" } ,
18302   { PUB_Patent, Pub_type_any, 0, "patent" }
18303 };
18304 
18305 #define NUM_pub_class_quals sizeof (pub_class_quals) / sizeof (PubClassQualData)
18306 
18307 
18308 
GetPubClassList()18309 NLM_EXTERN ValNodePtr GetPubClassList ()
18310 {
18311   ValNodePtr list = NULL;
18312   Int4 i;
18313 
18314   for (i = 0; i < NUM_pub_class_quals; i++) {
18315     ValNodeAddPointer (&list, Publication_field_pub_class, StringSave (pub_class_quals[i].name));
18316   }
18317 
18318   return list;
18319 }
18320 
18321 
GetPubclassQualFromPub(PubPtr the_pub)18322 static PubClassQualPtr GetPubclassQualFromPub (PubPtr the_pub)
18323 {
18324   CharPtr str = NULL;
18325   CitArtPtr art;
18326   Int4 ml_class;
18327   Int4 art_from = 0;
18328   Int4 i;
18329 
18330   if (the_pub == NULL) {
18331     return NULL;
18332   }
18333 
18334   ml_class = GetPubMLStatus(the_pub);
18335   if (the_pub->choice == PUB_Article && (art = (CitArtPtr) the_pub->data.ptrvalue) != NULL) {
18336     art_from = art->from;
18337   }
18338 
18339   for (i = 0; i < NUM_pub_class_quals; i++) {
18340     if (the_pub->choice == pub_class_quals[i].pub_choice
18341         && (ml_class == pub_class_quals[i].status || ml_class == 0 || pub_class_quals[i].status == 0)
18342         && (art_from == 0 || pub_class_quals[i].art_from == 0 || art_from == pub_class_quals[i].art_from)) {
18343       return pub_class_quals + i;
18344     }
18345   }
18346 
18347   return NULL;
18348 }
18349 
18350 
GetPubclassFromPub(PubPtr the_pub)18351 NLM_EXTERN CharPtr GetPubclassFromPub (PubPtr the_pub)
18352 {
18353   PubClassQualPtr pq = GetPubclassQualFromPub (the_pub);
18354   if (pq == NULL) {
18355     return NULL;
18356   } else {
18357     return StringSave(pq->name);
18358   }
18359 }
18360 
18361 
GetPubclassFromString(CharPtr str)18362 static PubClassQualPtr GetPubclassFromString(CharPtr str)
18363 {
18364   Int4 i;
18365   PubClassQualPtr pq = NULL;
18366 
18367   for (i = 0; i < NUM_pub_class_quals; i++) {
18368     if (StringsAreEquivalent (pub_class_quals[i].name, str)) {
18369       pq = pub_class_quals + i;
18370       break;
18371     }
18372   }
18373   return pq;
18374 }
18375 
18376 
FreePubDataForConversion(PubPtr the_pub)18377 static Boolean FreePubDataForConversion (PubPtr the_pub)
18378 {
18379   Boolean rval = FALSE;
18380 
18381   if (the_pub == NULL) {
18382     return FALSE;
18383   }
18384 
18385   switch (the_pub->choice) {
18386       case PUB_Gen:
18387         the_pub->data.ptrvalue = CitGenFree (the_pub->data.ptrvalue);
18388         rval = TRUE;
18389         break;
18390       case PUB_Sub:
18391         the_pub->data.ptrvalue = CitSubFree (the_pub->data.ptrvalue);
18392         rval = TRUE;
18393         break;
18394       case PUB_Article:
18395         the_pub->data.ptrvalue = CitArtFree (the_pub->data.ptrvalue);
18396         rval = TRUE;
18397         break;
18398       case PUB_Journal:
18399         the_pub->data.ptrvalue = CitJourFree (the_pub->data.ptrvalue);
18400         rval = TRUE;
18401         break;
18402       case PUB_Book:
18403       case PUB_Man:
18404       case PUB_Proc:
18405         the_pub->data.ptrvalue = CitBookFree (the_pub->data.ptrvalue);
18406         rval = TRUE;
18407         break;
18408       case PUB_Patent:
18409         the_pub->data.ptrvalue = CitPatFree (the_pub->data.ptrvalue);
18410         rval = TRUE;
18411         break;
18412   }
18413   return rval;
18414 }
18415 
18416 
SetArtFrom(PubPtr the_pub,Uint1 art_from)18417 static void SetArtFrom(PubPtr the_pub, Uint1 art_from)
18418 {
18419   CitArtPtr cap;
18420   CitJourPtr cjp;
18421   CitBookPtr cbp;
18422 
18423   if (the_pub == NULL) {
18424     return;
18425   }
18426   if (the_pub->choice == PUB_Article) {
18427     if ((cap = (CitArtPtr)the_pub->data.ptrvalue) == NULL) {
18428       cap = CitArtNew();
18429       the_pub->data.ptrvalue = cap;
18430     }
18431     cap->from = art_from;
18432     switch (cap->from) {
18433       case 1:
18434         cjp = CitJourNew();
18435         cjp->imp = ImprintNew();
18436         cap->fromptr = cjp;
18437         break;
18438       case 2:
18439       case 3:
18440         cbp = CitBookNew();
18441         cbp->imp = ImprintNew();
18442         cap->fromptr = cbp;
18443         break;
18444     }
18445   }
18446 }
18447 
18448 
NewPubDataForConversion(PubPtr the_pub,Uint1 art_from)18449 static Boolean NewPubDataForConversion (PubPtr the_pub, Uint1 art_from)
18450 {
18451   CitBookPtr cbp;
18452   Boolean rval = FALSE;
18453 
18454   if (the_pub == NULL) {
18455     return FALSE;
18456   }
18457 
18458   switch (the_pub->choice) {
18459       case PUB_Gen:
18460         the_pub->data.ptrvalue = CitGenNew();
18461         rval = TRUE;
18462         break;
18463       case PUB_Sub:
18464         the_pub->data.ptrvalue = CitSubNew();
18465         rval = TRUE;
18466         break;
18467       case PUB_Article:
18468         the_pub->data.ptrvalue = CitArtNew();
18469         SetArtFrom(the_pub, art_from);
18470         rval = TRUE;
18471         break;
18472       case PUB_Journal:
18473         the_pub->data.ptrvalue = CitJourNew();
18474         rval = TRUE;
18475         break;
18476       case PUB_Book:
18477       case PUB_Man:
18478       case PUB_Proc:
18479         cbp = CitBookNew();
18480         cbp->imp = ImprintNew();
18481         cbp->imp->date = DateNew();
18482         the_pub->data.ptrvalue = cbp;
18483         rval = TRUE;
18484         break;
18485       case PUB_Patent:
18486         the_pub->data.ptrvalue = CitPatNew();
18487         rval = TRUE;
18488         break;
18489   }
18490   return rval;
18491 }
18492 
18493 
GetPubImprint(PubPtr the_pub)18494 static ImprintPtr GetPubImprint (PubPtr the_pub)
18495 {
18496   CitArtPtr  cap;
18497   CitBookPtr cbp;
18498   CitJourPtr cjp;
18499   ImprintPtr imp = NULL;
18500 
18501   if (the_pub == NULL || the_pub->data.ptrvalue == NULL)
18502   {
18503     return NULL;
18504   }
18505 
18506   switch (the_pub->choice)
18507   {
18508     case PUB_Article :
18509       cap = (CitArtPtr) the_pub->data.ptrvalue;
18510       if (cap->from == 1)
18511       {
18512         cjp = (CitJourPtr) cap->fromptr;
18513         if (cjp != NULL)
18514         {
18515           imp = cjp->imp;
18516         }
18517       }
18518       else if (cap->from == 2 || cap->from == 3)
18519       {
18520       cbp = (CitBookPtr) cap->fromptr;
18521         if (cbp != NULL) {
18522         imp = cbp->imp;
18523         }
18524       }
18525       break;
18526     case PUB_Journal :
18527       cjp = (CitJourPtr) the_pub->data.ptrvalue;
18528       imp = cjp->imp;
18529     case PUB_Book :
18530     case PUB_Man :
18531       cbp = (CitBookPtr) the_pub->data.ptrvalue;
18532       imp = cbp->imp;
18533       break;
18534     default :
18535       break;
18536 
18537   }
18538   return imp;
18539 }
18540 
18541 
SetPubStatusOnPub(PubPtr the_pub,Int4 status)18542 static Boolean SetPubStatusOnPub (PubPtr the_pub, Int4 status)
18543 {
18544   ImprintPtr imp;
18545   CitGenPtr  cgp;
18546   Boolean rval = FALSE;
18547 
18548   imp = GetPubImprint(the_pub);
18549   if (imp != NULL) {
18550     switch (status) {
18551       case Pub_type_unpublished:
18552         imp->prepub = 255;
18553         rval = TRUE;
18554         break;
18555       case Pub_type_published:
18556         imp->prepub = 0;
18557         rval = TRUE;
18558         break;
18559       case Pub_type_in_press:
18560         imp->prepub = 2;
18561         rval = TRUE;
18562         break;
18563       case Pub_type_submitter_block:
18564         imp->prepub = 1;
18565         rval = TRUE;
18566         break;
18567     }
18568   } else if (the_pub->choice == PUB_Gen) {
18569     if ((cgp = (CitGenPtr) the_pub->data.ptrvalue) == NULL) {
18570       cgp = CitGenNew();
18571       the_pub->data.ptrvalue = cgp;
18572     }
18573     if (status == Pub_type_unpublished) {
18574       cgp->cit = MemFree (cgp->cit);
18575       cgp->cit = StringSave("unpublished");
18576     } else {
18577       if (StringICmp (cgp->cit, "unpublished") == 0) {
18578         cgp->cit = MemFree (cgp->cit);
18579       }
18580     }
18581   } else {
18582 
18583   }
18584   return rval;
18585 }
18586 
18587 
CopyRelevantPubDetails(PubPtr orig_pub,PubPtr new_pub)18588 static void CopyRelevantPubDetails (PubPtr orig_pub, PubPtr new_pub)
18589 {
18590   Int4 i;
18591   CharPtr val;
18592 
18593   if (orig_pub == NULL || new_pub == NULL) {
18594     return;
18595   }
18596 
18597   for (i = 0; i < NUM_pubfield_labels; i++) {
18598     if (pubfield_labels[i].pub_field != Publication_field_pub_class /* field we are copying now */
18599         && pubfield_labels[i].pub_field != Publication_field_authors /* already copying this elsewhere */) {
18600       val = GetPubFieldFromPub(orig_pub, pubfield_labels[i].pub_field, NULL);
18601       if (!StringHasNoText (val)) {
18602         SetPubFieldOnPub(new_pub, pubfield_labels[i].pub_field, NULL, val, ExistingTextOption_replace_old);
18603       }
18604     }
18605   }
18606 }
18607 
18608 
SetPubclassOnPub(PubPtr the_pub,CharPtr pub_class)18609 NLM_EXTERN Boolean SetPubclassOnPub (PubPtr the_pub, CharPtr pub_class)
18610 {
18611   PubClassQualPtr orig_pq = NULL, new_pq = NULL;
18612   AuthListPtr PNTR palp;
18613   AuthListPtr PNTR new_palp;
18614   Boolean rval = FALSE;
18615   ValNode new_pub;
18616 
18617   if (the_pub == NULL) {
18618     return FALSE;
18619   }
18620 
18621   new_pq = GetPubclassFromString(pub_class);
18622   orig_pq = GetPubclassQualFromPub(the_pub);
18623 
18624   if (new_pq == NULL || orig_pq == NULL || new_pq == orig_pq) {
18625     return FALSE;
18626   }
18627 
18628   if (new_pq->pub_choice == the_pub->choice && new_pq->art_from == orig_pq->art_from) {
18629     /* only thing changing is in-press/published */
18630     if (new_pq->status != orig_pq->status) {
18631       rval = SetPubStatusOnPub(the_pub, new_pq->status);
18632     }
18633   } else {
18634     MemSet (&new_pub, 0, sizeof (ValNode));
18635     new_pub.choice = new_pq->pub_choice;
18636     NewPubDataForConversion(&new_pub, new_pq->art_from);
18637 
18638     palp = GetAuthListForPub(the_pub);
18639     new_palp = GetAuthListForPub(&new_pub);
18640     if (palp && *palp && new_palp) {
18641       *new_palp = AsnIoMemCopy (*palp, (AsnReadFunc) AuthListAsnRead, (AsnWriteFunc) AuthListAsnWrite);
18642     }
18643     /* TODO: Copy over other relevant details */
18644     CopyRelevantPubDetails(the_pub, &new_pub);
18645 
18646     SetPubStatusOnPub(&new_pub, new_pq->status);
18647     rval = FreePubDataForConversion(the_pub);
18648     if (rval) {
18649       the_pub->choice = new_pub.choice;
18650       the_pub->data.ptrvalue = new_pub.data.ptrvalue;
18651     } else {
18652       FreePubDataForConversion(&new_pub);
18653     }
18654 
18655   }
18656 
18657 
18658   return FALSE;
18659 }
18660 
18661 
IsPublicationConstraintEmpty(PublicationConstraintPtr constraint)18662 NLM_EXTERN Boolean IsPublicationConstraintEmpty (PublicationConstraintPtr constraint)
18663 {
18664   Boolean rval = FALSE;
18665 
18666   if (constraint == NULL
18667       || (constraint->type == Pub_type_any
18668           && (constraint->field == NULL
18669               || IsStringConstraintEmpty (constraint->field->constraint))
18670           && (constraint->special_field == NULL
18671               || constraint->special_field->constraint == NULL))) {
18672     rval = TRUE;
18673   }
18674   return rval;
18675 }
18676 
18677 
GetPubMLStatus(PubPtr the_pub)18678 NLM_EXTERN Int4 GetPubMLStatus (PubPtr the_pub)
18679 {
18680   CitGenPtr  cgp;
18681   CitSubPtr  csp;
18682   CitArtPtr  cap;
18683   CitBookPtr cbp;
18684   CitJourPtr cjp;
18685   ImprintPtr imp = NULL;
18686   Int4       status = Pub_type_any;
18687 
18688   if (the_pub == NULL || the_pub->data.ptrvalue == NULL)
18689   {
18690     return Pub_type_any;
18691   }
18692 
18693   switch (the_pub->choice)
18694   {
18695     case PUB_Gen :
18696       cgp = (CitGenPtr) the_pub->data.ptrvalue;
18697       if (cgp->cit != NULL && StringICmp (cgp->cit, "unpublished") == 0)
18698       {
18699         status = Pub_type_unpublished;
18700       }
18701       else
18702       {
18703         status = Pub_type_published;
18704       }
18705       break;
18706     case PUB_Sub :
18707       csp = (CitSubPtr) the_pub->data.ptrvalue;
18708       status = Pub_type_submitter_block;
18709       break;
18710     case PUB_Article :
18711       cap = (CitArtPtr) the_pub->data.ptrvalue;
18712       if (cap->from == 1)
18713       {
18714         cjp = (CitJourPtr) cap->fromptr;
18715         if (cjp != NULL)
18716         {
18717           imp = cjp->imp;
18718         }
18719       }
18720       else if (cap->from == 2 || cap->from == 3)
18721       {
18722         cbp = (CitBookPtr) cap->fromptr;
18723         if (cbp != NULL) {
18724           imp = cbp->imp;
18725         }
18726       }
18727       break;
18728     case PUB_Journal :
18729       cjp = (CitJourPtr) the_pub->data.ptrvalue;
18730       imp = cjp->imp;
18731     case PUB_Book :
18732     case PUB_Man :
18733       cbp = (CitBookPtr) the_pub->data.ptrvalue;
18734       imp = cbp->imp;
18735       break;
18736     case PUB_Patent :
18737       status = Pub_type_published;
18738       break;
18739     default :
18740       break;
18741 
18742   }
18743   if (imp != NULL)
18744   {
18745     if (imp->prepub == 0)
18746     {
18747       status = Pub_type_published;
18748     }
18749     else if (imp->prepub == 2)
18750     {
18751       status = Pub_type_in_press;
18752     }
18753     else if (imp->prepub == 1 && the_pub->choice == PUB_Sub)
18754     {
18755       status = Pub_type_submitter_block;
18756     }
18757     else
18758     {
18759       status = Pub_type_unpublished;
18760     }
18761 
18762   }
18763   return status;
18764 }
18765 
18766 
DoesPubFieldMatch(PubdescPtr pdp,PubFieldConstraintPtr field)18767 static Boolean DoesPubFieldMatch (PubdescPtr pdp, PubFieldConstraintPtr field)
18768 {
18769   Boolean rval = FALSE, match_all = TRUE;
18770   PubPtr pub;
18771   CharPtr tmp;
18772 
18773   if (pdp == NULL) return FALSE;
18774   if (field == NULL) return TRUE;
18775 
18776   if (field->constraint->not_present) {
18777     match_all = TRUE;
18778     for (pub = pdp->pub; pub != NULL && match_all; pub = pub->next) {
18779       tmp = GetPubFieldFromPub (pub, field->field, NULL);
18780       if (!DoesStringMatchConstraint (tmp, field->constraint)) {
18781         match_all = FALSE;
18782       }
18783       tmp = MemFree (tmp);
18784     }
18785     rval = match_all;
18786   } else {
18787     for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) {
18788       tmp = GetPubFieldFromPub (pub, field->field, field->constraint);
18789       if (tmp != NULL) {
18790         rval = TRUE;
18791       }
18792       tmp = MemFree (tmp);
18793     }
18794   }
18795   return rval;
18796 }
18797 
18798 
DoesPubFieldSpecialMatch(PubdescPtr pdp,PubFieldSpecialConstraintPtr field)18799 static Boolean DoesPubFieldSpecialMatch (PubdescPtr pdp, PubFieldSpecialConstraintPtr field)
18800 {
18801   Boolean rval = FALSE;
18802   PubPtr pub;
18803   CharPtr tmp;
18804 
18805   if (pdp == NULL) return FALSE;
18806   if (field == NULL) return TRUE;
18807 
18808   if (field->constraint->choice == PubFieldSpecialConstraintType_is_present) {
18809     for (pub = pdp->pub; pub != NULL && !rval; pub = pub->next) {
18810       tmp = GetPubFieldFromPub (pub, field->field, NULL);
18811       if (!StringHasNoText (tmp)) {
18812         /* at least one is present and non-empty */
18813         rval = TRUE;
18814       }
18815       tmp = MemFree (tmp);
18816     }
18817   } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_not_present) {
18818     rval = TRUE;
18819     for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
18820       tmp = GetPubFieldFromPub (pub, field->field, NULL);
18821       if (!StringHasNoText (tmp)) {
18822         /* at least one is present and non-empty */
18823         rval = FALSE;
18824       }
18825       tmp = MemFree (tmp);
18826     }
18827   } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_caps) {
18828     rval = TRUE;
18829     for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
18830       tmp = GetPubFieldFromPub (pub, field->field, NULL);
18831       if (tmp != NULL && !IsAllCaps (tmp)) {
18832         /* at least one is not all caps */
18833         rval = FALSE;
18834       }
18835       tmp = MemFree (tmp);
18836     }
18837   } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_lower) {
18838     rval = TRUE;
18839     for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
18840       tmp = GetPubFieldFromPub (pub, field->field, NULL);
18841       if (tmp != NULL && !IsAllLowerCase (tmp)) {
18842         /* at least one is not all caps */
18843         rval = FALSE;
18844       }
18845       tmp = MemFree (tmp);
18846     }
18847   } else if (field->constraint->choice == PubFieldSpecialConstraintType_is_all_punct) {
18848     rval = TRUE;
18849     for (pub = pdp->pub; pub != NULL && rval; pub = pub->next) {
18850       tmp = GetPubFieldFromPub (pub, field->field, NULL);
18851       if (tmp != NULL && !IsAllPunctuation (tmp)) {
18852         /* at least one is not all punctuation */
18853         rval = FALSE;
18854       }
18855       tmp = MemFree (tmp);
18856     }
18857   }
18858 
18859   return rval;
18860 }
18861 
18862 
DoesPubMatchPublicationConstraint(PubdescPtr pdp,PublicationConstraintPtr constraint)18863 static Boolean DoesPubMatchPublicationConstraint (PubdescPtr pdp, PublicationConstraintPtr constraint)
18864 {
18865   Boolean type_ok = TRUE, rval = FALSE;
18866   PubPtr pub;
18867 
18868   if (pdp == NULL) return FALSE;
18869   if (IsPublicationConstraintEmpty (constraint)) return TRUE;
18870 
18871   if (constraint->type != Pub_type_any) {
18872     type_ok = FALSE;
18873     for (pub = pdp->pub; pub != NULL && !type_ok; pub = pub->next) {
18874       if (GetPubMLStatus (pub) == constraint->type) {
18875         type_ok = TRUE;
18876       }
18877     }
18878   }
18879   if (type_ok) {
18880     rval = (constraint->field == NULL || DoesPubFieldMatch (pdp, constraint->field))
18881            && (constraint->special_field == NULL || DoesPubFieldSpecialMatch (pdp, constraint->special_field));
18882   }
18883   return rval;
18884 }
18885 
18886 
DoesObjectMatchPublicationConstraint(Uint1 choice,Pointer data,PublicationConstraintPtr constraint)18887 static Boolean DoesObjectMatchPublicationConstraint (Uint1 choice, Pointer data, PublicationConstraintPtr constraint)
18888 {
18889   Boolean     rval = TRUE;
18890   SeqFeatPtr  sfp;
18891   SeqDescrPtr sdp;
18892 
18893   if (data == NULL) return FALSE;
18894   if (IsPublicationConstraintEmpty (constraint)) return TRUE;
18895 
18896   switch (choice) {
18897     case OBJ_SEQFEAT:
18898       sfp = (SeqFeatPtr) data;
18899       if (sfp->data.choice == SEQFEAT_PUB) {
18900         rval = DoesPubMatchPublicationConstraint (sfp->data.value.ptrvalue, constraint);
18901       }
18902       break;
18903     case OBJ_SEQDESC:
18904       sdp = (SeqDescrPtr) data;
18905       if (sdp->choice == Seq_descr_pub) {
18906         rval = DoesPubMatchPublicationConstraint (sdp->data.ptrvalue, constraint);
18907       }
18908       break;
18909   }
18910   return rval;
18911 }
18912 
18913 
IsFieldConstraintEmpty(FieldConstraintPtr constraint)18914 NLM_EXTERN Boolean IsFieldConstraintEmpty (FieldConstraintPtr constraint)
18915 {
18916   RnaQualPtr rq;
18917   FeatureFieldPtr ffp;
18918 
18919   if (constraint == NULL || constraint->field == NULL || IsStringConstraintEmpty (constraint->string_constraint)) {
18920     return TRUE;
18921   } else if (constraint->field->choice == FieldType_rna_field
18922              && ((rq = (RnaQualPtr)constraint->field->data.ptrvalue) == NULL
18923                  || rq->type == NULL)) {
18924     return TRUE;
18925   } else if (constraint->field->choice == FieldType_feature_field
18926     && (ffp = (FeatureFieldPtr)constraint->field->data.ptrvalue) == NULL) {
18927     return TRUE;
18928   } else {
18929     return FALSE;
18930   }
18931 }
18932 
18933 
DoesObjectMatchFeatureFieldConstraint(Uint1 choice,Pointer data,FeatureFieldPtr ffp,StringConstraintPtr string_constraint)18934 static Boolean DoesObjectMatchFeatureFieldConstraint (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr string_constraint)
18935 {
18936   Boolean           rval = FALSE;
18937   CharPtr           str;
18938   BioseqPtr         bsp;
18939   Int4              subtype;
18940   SeqFeatPtr        sfp;
18941   SeqMgrFeatContext fcontext;
18942   Boolean           not_present;
18943   CGPSetPtr         cgp;
18944   Uint2             cds_gene_prot_field;
18945 
18946   if (data == NULL) {
18947     return FALSE;
18948   }
18949   if (IsStringConstraintEmpty (string_constraint)) {
18950     return TRUE;
18951   }
18952 
18953   switch (choice) {
18954     case OBJ_SEQFEAT:
18955       not_present = string_constraint->not_present;
18956       string_constraint->not_present = FALSE;
18957       str = GetQualFromFeature ((SeqFeatPtr) data, ffp, string_constraint);
18958       if (str != NULL) {
18959         rval = TRUE;
18960         str = MemFree (str);
18961       }
18962       if (not_present) {
18963         rval = !rval;
18964         string_constraint->not_present = TRUE;
18965       }
18966       break;
18967     case OBJ_SEQDESC:
18968     case OBJ_BIOSEQ:
18969       bsp = GetSequenceForObject (choice, data);
18970       if (bsp != NULL) {
18971         subtype = GetFeatdefFromFeatureType (ffp->type);
18972         not_present = string_constraint->not_present;
18973         string_constraint->not_present = FALSE;
18974         for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext);
18975               !rval && sfp != NULL;
18976               sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) {
18977           str = GetQualFromFeature (sfp, ffp, string_constraint);
18978           if (str != NULL) {
18979             rval = TRUE;
18980             str = MemFree (str);
18981           }
18982         }
18983         if (not_present) {
18984           rval = !rval;
18985           string_constraint->not_present = TRUE;
18986         }
18987       }
18988       break;
18989     case 0:
18990       cgp = (CGPSetPtr) data;
18991       cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp);
18992       if (cds_gene_prot_field > 0) {
18993         not_present = string_constraint->not_present;
18994         string_constraint->not_present = FALSE;
18995         str = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, string_constraint);
18996         if (str != NULL) {
18997           rval = TRUE;
18998           str = MemFree (str);
18999         }
19000         if (not_present) {
19001           rval = !rval;
19002           string_constraint->not_present = TRUE;
19003         }
19004       }
19005       break;
19006   }
19007   return rval;
19008 }
19009 
19010 
DoesObjectMatchRnaQualConstraint(Uint1 choice,Pointer data,RnaQualPtr rq,StringConstraintPtr string_constraint)19011 static Boolean DoesObjectMatchRnaQualConstraint (Uint1 choice, Pointer data, RnaQualPtr rq, StringConstraintPtr string_constraint)
19012 {
19013   Boolean           rval = FALSE;
19014   CharPtr           str;
19015   BioseqPtr         bsp;
19016   Int4              subtype;
19017   SeqFeatPtr        sfp;
19018   SeqMgrFeatContext fcontext;
19019   Boolean           not_present;
19020   Uint1             feat_choice = 0;
19021 
19022   if (data == NULL) {
19023     return FALSE;
19024   }
19025   if (IsStringConstraintEmpty (string_constraint)) {
19026     return TRUE;
19027   }
19028 
19029   switch (choice) {
19030     case OBJ_SEQFEAT:
19031       not_present = string_constraint->not_present;
19032       string_constraint->not_present = FALSE;
19033       str = GetRNAQualFromFeature ((SeqFeatPtr) data,  rq, string_constraint, NULL);
19034       if (str != NULL) {
19035         rval = TRUE;
19036         str = MemFree (str);
19037       }
19038       if (not_present) {
19039         rval = !rval;
19040         string_constraint->not_present = TRUE;
19041       }
19042       break;
19043     case OBJ_SEQDESC:
19044     case OBJ_BIOSEQ:
19045       bsp = GetSequenceForObject (choice, data);
19046       if (bsp != NULL) {
19047         if (rq->type == NULL || rq->type->choice == RnaFeatType_any) {
19048           feat_choice = SEQFEAT_RNA;
19049           subtype = 0;
19050         } else {
19051           feat_choice = 0;
19052           subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice));
19053         }
19054 
19055         not_present = string_constraint->not_present;
19056         string_constraint->not_present = FALSE;
19057         for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext);
19058               !rval && sfp != NULL;
19059               sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) {
19060           str = GetRNAQualFromFeature (sfp, rq, string_constraint, NULL);
19061           if (str != NULL) {
19062             rval = TRUE;
19063             str = MemFree (str);
19064           }
19065         }
19066         if (not_present) {
19067           rval = !rval;
19068           string_constraint->not_present = TRUE;
19069         }
19070       }
19071       break;
19072   }
19073   return rval;
19074 }
19075 
19076 
DoesObjectMatchFieldConstraint(Uint1 choice,Pointer data,FieldConstraintPtr constraint)19077 static Boolean DoesObjectMatchFieldConstraint (Uint1 choice, Pointer data, FieldConstraintPtr constraint)
19078 {
19079   Boolean rval = FALSE;
19080   BioSourcePtr biop;
19081   BioseqPtr    bsp;
19082   CharPtr      str;
19083   FeatureFieldPtr ffp;
19084 
19085   if (data == NULL) return FALSE;
19086   if (IsFieldConstraintEmpty (constraint)) {
19087     return TRUE;
19088   }
19089 
19090   switch (constraint->field->choice) {
19091     case FieldType_source_qual:
19092       biop = GetBioSourceFromObject (choice, data);
19093       if (biop != NULL) {
19094         str = GetSourceQualFromBioSource (biop, constraint->field->data.ptrvalue, constraint->string_constraint);
19095         if (str != NULL) {
19096           rval = TRUE;
19097           str = MemFree (str);
19098         }
19099       }
19100       break;
19101     case FieldType_feature_field:
19102       ffp = (FeatureFieldPtr) constraint->field->data.ptrvalue;
19103       rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint);
19104       break;
19105     case FieldType_rna_field:
19106       rval = DoesObjectMatchRnaQualConstraint (choice, data, constraint->field->data.ptrvalue, constraint->string_constraint);
19107       break;
19108     case FieldType_cds_gene_prot:
19109       ffp = FeatureFieldFromCDSGeneProtField (constraint->field->data.intvalue);
19110       rval = DoesObjectMatchFeatureFieldConstraint (choice, data, ffp, constraint->string_constraint);
19111       ffp = FeatureFieldFree (ffp);
19112       break;
19113     case FieldType_molinfo_field:
19114       bsp = GetSequenceForObject (choice, data);
19115       if (bsp != NULL) {
19116         str = GetSequenceQualFromBioseq (bsp, constraint->field->data.ptrvalue);
19117         if (str == NULL && constraint->string_constraint->not_present) {
19118           rval = TRUE;
19119         } else if (str != NULL && DoesStringMatchConstraint (str, constraint->string_constraint)) {
19120           rval = TRUE;
19121         }
19122         str = MemFree (str);
19123       }
19124       break;
19125     case FieldType_misc:
19126     case FieldType_dblink:
19127       bsp = GetSequenceForObject (choice, data);
19128       if (bsp != NULL) {
19129         str = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, constraint->field, constraint->string_constraint, NULL);
19130         if (str != NULL) {
19131           rval = TRUE;
19132         }
19133         str = MemFree (str);
19134       }
19135       break;
19136 
19137 /* TODO LATER */
19138     case FieldType_pub:
19139       break;
19140   }
19141   return rval;
19142 }
19143 
19144 
GetFeatureFieldFromObject(Uint1 choice,Pointer data,FeatureFieldPtr ffp,StringConstraintPtr scp)19145 static CharPtr GetFeatureFieldFromObject (Uint1 choice, Pointer data, FeatureFieldPtr ffp, StringConstraintPtr scp)
19146 {
19147   CharPtr           rval = NULL;
19148   BioseqPtr         bsp;
19149   CGPSetPtr         cgp;
19150   SeqFeatPtr        sfp;
19151   SeqMgrFeatContext fcontext;
19152   Int4              subtype;
19153   Uint2             cds_gene_prot_field;
19154 
19155   if (ffp == NULL || data == NULL) {
19156     return NULL;
19157   }
19158   switch (choice) {
19159     case OBJ_SEQFEAT:
19160       rval = GetQualFromFeature ((SeqFeatPtr) data, ffp, scp);
19161       break;
19162     case OBJ_SEQDESC:
19163     case OBJ_BIOSEQ:
19164       bsp = GetSequenceForObject (choice, data);
19165       if (bsp != NULL) {
19166         subtype = GetFeatdefFromFeatureType (ffp->type);
19167         for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, subtype, &fcontext);
19168               rval == NULL && sfp != NULL;
19169               sfp = SeqMgrGetNextFeature (bsp, sfp, 0, subtype, &fcontext)) {
19170           rval = GetQualFromFeature (sfp, ffp, scp);
19171         }
19172       }
19173       break;
19174     case 0:
19175       cgp = (CGPSetPtr) data;
19176       cds_gene_prot_field = CDSGeneProtFieldFromFeatureField (ffp);
19177       if (cds_gene_prot_field > 0) {
19178         rval = GetFieldValueFromCGPSet (cgp, cds_gene_prot_field, scp);
19179       }
19180       break;
19181   }
19182   return rval;
19183 }
19184 
19185 
GetConstraintFieldFromObject(Uint1 choice,Pointer data,ValNodePtr field,StringConstraintPtr scp)19186 static CharPtr GetConstraintFieldFromObject (Uint1 choice, Pointer data, ValNodePtr field, StringConstraintPtr scp)
19187 {
19188   BioSourcePtr  biop;
19189   BioseqPtr     bsp;
19190   SeqFeatPtr    sfp;
19191   SeqMgrFeatContext fcontext;
19192   Int4              subtype;
19193   FeatureFieldPtr   ffp;
19194   RnaQualPtr        rq;
19195   Uint1             feat_choice = 0;
19196   CharPtr rval = NULL;
19197 
19198   if (data == NULL || field == NULL) {
19199     return NULL;
19200   }
19201 
19202   switch (field->choice) {
19203     case FieldType_source_qual:
19204       biop = GetBioSourceFromObject (choice, data);
19205       if (biop != NULL) {
19206         rval = GetSourceQualFromBioSource (biop, field->data.ptrvalue, scp);
19207       }
19208       break;
19209     case FieldType_feature_field:
19210       rval = GetFeatureFieldFromObject(choice, data, (FeatureFieldPtr) field->data.ptrvalue, scp);
19211       break;
19212     case FieldType_rna_field:
19213       rq = (RnaQualPtr) field->data.ptrvalue;
19214       switch (choice) {
19215         case OBJ_SEQFEAT:
19216           rval = GetRNAQualFromFeature ((SeqFeatPtr) data,  rq, scp, NULL);
19217           break;
19218         case OBJ_SEQDESC:
19219         case OBJ_BIOSEQ:
19220           bsp = GetSequenceForObject (choice, data);
19221           if (bsp != NULL) {
19222             if (rq->type == NULL || rq->type->choice == RnaFeatType_any) {
19223               feat_choice = SEQFEAT_RNA;
19224               subtype = 0;
19225             } else {
19226               feat_choice = 0;
19227               subtype = GetFeatdefFromFeatureType(GetFeatureTypeForRnaType(rq->type->choice));
19228             }
19229 
19230             for (sfp = SeqMgrGetNextFeature (bsp, NULL, feat_choice, subtype, &fcontext);
19231                   rval == NULL && sfp != NULL;
19232                   sfp = SeqMgrGetNextFeature (bsp, sfp, feat_choice, subtype, &fcontext)) {
19233               rval = GetRNAQualFromFeature (sfp, rq, scp, NULL);
19234             }
19235           }
19236           break;
19237       }
19238       break;
19239     case FieldType_cds_gene_prot:
19240       ffp = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
19241       rval = GetFeatureFieldFromObject (choice, data, ffp, scp);
19242       ffp = FeatureFieldFree (ffp);
19243       break;
19244     case FieldType_molinfo_field:
19245       bsp = GetSequenceForObject (choice, data);
19246       if (bsp != NULL) {
19247         rval = GetSequenceQualFromBioseq (bsp, field->data.ptrvalue);
19248         if (rval != NULL && scp != NULL && !DoesStringMatchConstraint (rval, scp)) {
19249           rval = MemFree (rval);
19250         }
19251       }
19252       break;
19253     case FieldType_misc:
19254       bsp = GetSequenceForObject (choice, data);
19255       if (bsp != NULL) {
19256         rval = GetFieldValueForObjectEx (OBJ_BIOSEQ, bsp, field, scp, NULL);
19257       }
19258       break;
19259   }
19260 
19261   return rval;
19262 }
19263 
19264 
DoesObjectMatchFieldMissingConstraint(Uint1 choice,Pointer data,ValNodePtr field)19265 static Boolean DoesObjectMatchFieldMissingConstraint(Uint1 choice, Pointer data, ValNodePtr field)
19266 {
19267   Boolean rval = FALSE;
19268   CharPtr str;
19269 
19270   if (data == NULL || field == NULL) return FALSE;
19271 
19272   str = GetConstraintFieldFromObject(choice, data, field, NULL);
19273   if (str == NULL) {
19274     rval = TRUE;
19275   }
19276   str = MemFree (str);
19277   return rval;
19278 }
19279 
19280 
IsMolinfoFieldConstraintEmpty(MolinfoFieldConstraintPtr constraint)19281 NLM_EXTERN Boolean IsMolinfoFieldConstraintEmpty (MolinfoFieldConstraintPtr constraint)
19282 {
19283   if (constraint == NULL || constraint->field == NULL) {
19284     return TRUE;
19285   } else {
19286     return FALSE;
19287   }
19288 }
19289 
19290 
DoesObjectMatchMolinfoFieldConstraint(Uint1 choice,Pointer data,MolinfoFieldConstraintPtr constraint)19291 static Boolean DoesObjectMatchMolinfoFieldConstraint (Uint1 choice, Pointer data, MolinfoFieldConstraintPtr constraint)
19292 {
19293   BioseqPtr bsp;
19294   MolInfoPtr mip;
19295   Boolean    rval = FALSE;
19296 
19297   bsp = GetSequenceForObject (choice, data);
19298   if (bsp == NULL) {
19299     rval = FALSE;
19300   } else if (IsMolinfoFieldConstraintEmpty(constraint)) {
19301     rval = TRUE;
19302   } else {
19303     mip = GetMolInfoForBioseq (bsp);
19304     rval = FALSE;
19305     switch (constraint->field->choice) {
19306       case MolinfoField_molecule:
19307         if (mip == NULL && constraint->field->data.intvalue == 0) {
19308           rval = TRUE;
19309         } else if (mip != NULL && mip->biomol == BiomolFromMoleculeType (constraint->field->data.intvalue)) {
19310           rval = TRUE;
19311         }
19312         break;
19313       case MolinfoField_technique:
19314         if (mip == NULL && constraint->field->data.intvalue == 0) {
19315           rval = TRUE;
19316         } else if (mip != NULL && mip->tech == TechFromTechniqueType (constraint->field->data.intvalue)) {
19317           rval = TRUE;
19318         }
19319         break;
19320       case MolinfoField_completedness:
19321         if (mip == NULL && constraint->field->data.intvalue == 0) {
19322           rval = TRUE;
19323         } else if (mip != NULL && mip->completeness == CompletenessFromCompletednessType (constraint->field->data.intvalue)) {
19324           rval = TRUE;
19325         }
19326         break;
19327       case MolinfoField_mol_class:
19328         if (bsp->mol == MolFromMoleculeClassType (constraint->field->data.intvalue)) {
19329           rval = TRUE;
19330         }
19331         break;
19332       case MolinfoField_topology:
19333         if (bsp->topology == TopologyFromTopologyType (constraint->field->data.intvalue)) {
19334           rval = TRUE;
19335         }
19336         break;
19337       case MolinfoField_strand:
19338         if (bsp->strand == StrandFromStrandType (constraint->field->data.intvalue)) {
19339           rval = TRUE;
19340         }
19341         break;
19342     }
19343     if (constraint->is_not) {
19344       rval = !rval;
19345     }
19346   }
19347 
19348   return rval;
19349 }
19350 
19351 
DoesCodingRegionMatchTranslationConstraint(SeqFeatPtr sfp,TranslationConstraintPtr constraint)19352 static Boolean DoesCodingRegionMatchTranslationConstraint (SeqFeatPtr sfp, TranslationConstraintPtr constraint)
19353 {
19354   ByteStorePtr        trans_prot = NULL;
19355   BioseqPtr           actual_prot = NULL;
19356   CharPtr             translation = NULL;
19357   Int4                translation_len = 0;
19358   CharPtr             actual = NULL;
19359   Int4                actual_len = 0;
19360   CharPtr             stop, cp1, cp2;
19361   Boolean             rval = TRUE, alt_start = FALSE;
19362   StringConstraintPtr scp;
19363   Int4                pos, comp_len;
19364   Int4                num = 0;
19365 
19366   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) {
19367     return FALSE;
19368   } else if (constraint == NULL) {
19369     return TRUE;
19370   }
19371 
19372   if (constraint->actual_strings != NULL
19373       || constraint->num_mismatches != NULL) {
19374     actual_prot = BioseqLockById(SeqLocId(sfp->product));
19375     if (actual_prot != NULL) {
19376       actual = (CharPtr) MemNew (sizeof (Char) * (actual_prot->length + 1));
19377       SeqPortStreamInt (actual_prot, 0, actual_prot->length - 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) (actual), NULL);
19378       actual_len = StringLen (actual);
19379     }
19380   }
19381 
19382   for (scp = constraint->actual_strings; scp != NULL && rval; scp = scp->next) {
19383     rval = DoesStringMatchConstraint (actual, scp);
19384   }
19385 
19386   if (rval) {
19387     if (constraint->transl_strings != NULL
19388         || constraint->internal_stops != Match_type_constraint_dont_care
19389         || constraint->num_mismatches != NULL) {
19390       trans_prot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, &alt_start, TRUE);   /* include stop codons, do not remove trailing X/B/Z */
19391       if (trans_prot != NULL) {
19392         translation = BSMerge (trans_prot, NULL);
19393         translation_len = StringLen (translation);
19394       }
19395       BSFree (trans_prot);
19396     }
19397     for (scp = constraint->transl_strings; scp != NULL && rval; scp = scp->next) {
19398       rval = DoesStringMatchConstraint (translation, scp);
19399     }
19400 
19401     if (rval && constraint->internal_stops != Match_type_constraint_dont_care) {
19402       stop = StringChr (translation, '*');
19403       if (stop != NULL && stop != translation + translation_len - 1) {
19404         if (constraint->internal_stops == Match_type_constraint_no) {
19405           rval = FALSE;
19406         }
19407       } else {
19408         if (constraint->internal_stops == Match_type_constraint_yes) {
19409           rval = FALSE;
19410         }
19411       }
19412     }
19413   }
19414 
19415   if (rval && constraint->num_mismatches != NULL) {
19416     stop = StringRChr (translation, '*');
19417     if (stop != NULL && stop == translation + translation_len - 1) {
19418       translation_len--;
19419     }
19420     stop = StringRChr (actual, '*');
19421     if (stop != NULL && stop == actual + actual_len - 1) {
19422       actual_len--;
19423     }
19424     if (translation_len > actual_len) {
19425       num = translation_len - actual_len;
19426       comp_len = actual_len;
19427     } else {
19428       num = actual_len - translation_len;
19429       comp_len = translation_len;
19430     }
19431 
19432     cp1 = actual;
19433     cp2 = translation;
19434     if (cp1 != NULL && cp2 != NULL) {
19435       for (pos = 0; pos < comp_len && rval; pos++) {
19436         if (*cp1 != *cp2) {
19437           num++;
19438           if (constraint->num_mismatches->choice == QuantityConstraint_equals
19439               && num > constraint->num_mismatches->data.intvalue) {
19440             rval = FALSE;
19441           } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than
19442               && num >= constraint->num_mismatches->data.intvalue) {
19443             rval = FALSE;
19444           }
19445         }
19446         cp1++;
19447         cp2++;
19448       }
19449     }
19450     if (rval) {
19451       if (constraint->num_mismatches->choice == QuantityConstraint_greater_than
19452           && num <= constraint->num_mismatches->data.intvalue) {
19453         rval = FALSE;
19454       } else if (constraint->num_mismatches->choice == QuantityConstraint_equals
19455                  && num != constraint->num_mismatches->data.intvalue) {
19456         rval = FALSE;
19457       } else if (constraint->num_mismatches->choice == QuantityConstraint_less_than
19458                  && num >= constraint->num_mismatches->data.intvalue) {
19459         rval = FALSE;
19460       }
19461     }
19462   }
19463 
19464   if (actual_prot != NULL) {
19465     BioseqUnlock(actual_prot);
19466   }
19467   actual = MemFree (actual);
19468   translation = MemFree (translation);
19469   return rval;
19470 }
19471 
19472 
DoesObjectMatchTranslationConstraint(Uint1 choice,Pointer data,TranslationConstraintPtr constraint)19473 static Boolean DoesObjectMatchTranslationConstraint (Uint1 choice, Pointer data, TranslationConstraintPtr constraint)
19474 {
19475   Boolean rval = FALSE;
19476   SeqFeatPtr sfp = NULL;
19477   BioseqPtr bsp;
19478   SeqMgrFeatContext  context;
19479 
19480   if (data == NULL) {
19481     return FALSE;
19482   } else if (constraint == NULL) {
19483     return TRUE;
19484   }
19485 
19486   switch (choice) {
19487     case OBJ_SEQFEAT:
19488       /* must be coding region or protein feature */
19489       sfp = (SeqFeatPtr) data;
19490       if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
19491         bsp = BioseqFindFromSeqLoc (sfp->location);
19492         sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
19493       }
19494       rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint);
19495       break;
19496     case OBJ_BIOSEQ:
19497       /* must be protein sequence, or nucleotide bioseq with only one coding region */
19498       bsp = data;
19499       if (bsp != NULL) {
19500         if (ISA_aa (bsp->mol)) {
19501           sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
19502         } else {
19503           sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &context);
19504           if (SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &context) != NULL) {
19505             sfp = NULL;
19506           }
19507         }
19508         rval = DoesCodingRegionMatchTranslationConstraint (sfp, constraint);
19509       }
19510       break;
19511   }
19512   return rval;
19513 }
19514 
19515 
DoesObjectMatchConstraint(Uint1 choice,Pointer data,ConstraintChoicePtr constraint)19516 static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint)
19517 {
19518   Boolean rval = TRUE;
19519 
19520   if (data == NULL) return FALSE;
19521   if (constraint == NULL) return TRUE;
19522 
19523   switch (constraint->choice) {
19524     case ConstraintChoice_string :
19525       rval = DoesObjectMatchStringConstraint (choice, data, constraint->data.ptrvalue);
19526       break;
19527     case ConstraintChoice_location :
19528       rval = DoesObjectMatchLocationConstraint (choice, data, constraint->data.ptrvalue);
19529       break;
19530     case ConstraintChoice_field :
19531       rval = DoesObjectMatchFieldConstraint (choice, data, constraint->data.ptrvalue);
19532       break;
19533     case ConstraintChoice_source :
19534       rval = DoesBiosourceMatchConstraint (GetBioSourceFromObject (choice, data), constraint->data.ptrvalue);
19535       break;
19536     case ConstraintChoice_cdsgeneprot_qual :
19537       if (choice == 0) {
19538         rval = DoesCGPSetMatchQualConstraint (data, constraint->data.ptrvalue);
19539       } else if (choice == OBJ_SEQDESC) {
19540         rval = DoesSeqDescMatchCGPQualConstraint (data, constraint->data.ptrvalue);
19541       } else if (choice == OBJ_SEQFEAT) {
19542         rval = DoesFeatureMatchCGPQualConstraint (data, constraint->data.ptrvalue);
19543       } else if (choice == OBJ_BIOSEQ) {
19544         rval = DoesSequenceMatchCGPQualConstraint (data, constraint->data.ptrvalue);
19545       } else {
19546         rval = FALSE;
19547       }
19548       break;
19549     case ConstraintChoice_cdsgeneprot_pseudo :
19550       if (choice == 0) {
19551         rval = DoesCGPSetMatchPseudoConstraint (data, constraint->data.ptrvalue);
19552       } else if (choice == OBJ_SEQFEAT) {
19553         rval = DoesFeatureMatchCGPPseudoConstraint (data, constraint->data.ptrvalue);
19554       }
19555       break;
19556     case ConstraintChoice_sequence :
19557       rval = DoesObjectMatchSequenceConstraint (choice, data, constraint->data.ptrvalue);
19558       break;
19559     case ConstraintChoice_pub:
19560       rval = DoesObjectMatchPublicationConstraint (choice, data, constraint->data.ptrvalue);
19561       break;
19562     case ConstraintChoice_molinfo:
19563       rval = DoesObjectMatchMolinfoFieldConstraint (choice, data, constraint->data.ptrvalue);
19564       break;
19565     case ConstraintChoice_field_missing:
19566       rval = DoesObjectMatchFieldMissingConstraint (choice, data, constraint->data.ptrvalue);
19567       break;
19568     case ConstraintChoice_translation:
19569       rval = DoesObjectMatchTranslationConstraint (choice, data, constraint->data.ptrvalue);
19570       break;
19571   }
19572   return rval;
19573 }
19574 
19575 
DoesObjectMatchConstraintChoiceSet(Uint1 choice,Pointer data,ConstraintChoiceSetPtr csp)19576 NLM_EXTERN Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp)
19577 {
19578   Boolean rval = TRUE;
19579 
19580   if (data == NULL) return FALSE;
19581 
19582   while (csp != NULL && rval) {
19583     rval = DoesObjectMatchConstraint (choice, data, csp);
19584     csp = csp->next;
19585   }
19586   return rval;
19587 }
19588 
19589 
FindStringConstraintInConstraintSetForField(FieldTypePtr field,ConstraintChoiceSetPtr csp)19590 NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp)
19591 {
19592   StringConstraintPtr scp = NULL;
19593   SourceConstraintPtr source_constraint;
19594   CDSGeneProtQualConstraintPtr cgp_constraint;
19595   PublicationConstraintPtr pub_constraint;
19596   FieldConstraintPtr       field_constraint;
19597   FieldType                ft;
19598 
19599   while (csp != NULL) {
19600     switch (csp->choice) {
19601       case ConstraintChoice_string :
19602         scp = csp->data.ptrvalue;
19603         break;
19604       case ConstraintChoice_source :
19605         source_constraint = (SourceConstraintPtr) csp->data.ptrvalue;
19606         if (source_constraint != NULL && source_constraint->constraint != NULL) {
19607           if (source_constraint->field1 != NULL) {
19608             ft.choice = FieldType_source_qual;
19609             ft.data.ptrvalue = source_constraint->field1;
19610             ft.next = NULL;
19611             if (DoFieldTypesMatch (field, &ft)) {
19612               scp = source_constraint->constraint;
19613             }
19614           }
19615           if (scp == NULL && source_constraint->field2 == NULL) {
19616             ft.choice = FieldType_source_qual;
19617             ft.data.ptrvalue = source_constraint->field2;
19618             ft.next = NULL;
19619             if (DoFieldTypesMatch (field, &ft)) {
19620               scp = source_constraint->constraint;
19621             }
19622           }
19623         }
19624         break;
19625       case ConstraintChoice_cdsgeneprot_qual :
19626         cgp_constraint = (CDSGeneProtQualConstraintPtr) csp->data.ptrvalue;
19627         if (field->choice == FieldType_cds_gene_prot
19628             && cgp_constraint != NULL && cgp_constraint->constraint != NULL
19629             && ((cgp_constraint->field1 != NULL && cgp_constraint->field1->data.intvalue == field->data.intvalue)
19630                 || (cgp_constraint->field2 != NULL && cgp_constraint->field2->data.intvalue == field->data.intvalue))) {
19631           scp = cgp_constraint->constraint;
19632         }
19633         break;
19634       case ConstraintChoice_pub :
19635         pub_constraint = csp->data.ptrvalue;
19636         if (pub_constraint != NULL && pub_constraint->field != NULL) {
19637           if (field->data.intvalue == pub_constraint->field->field
19638               && !IsStringConstraintEmpty (pub_constraint->field->constraint)) {
19639             scp = pub_constraint->field->constraint;
19640           }
19641         }
19642         break;
19643       case ConstraintChoice_field :
19644         field_constraint = csp->data.ptrvalue;
19645         if (field_constraint != NULL
19646             && field_constraint->field != NULL
19647             && DoFieldTypesMatch (field, field_constraint->field)) {
19648           scp = field_constraint->string_constraint;
19649         }
19650         break;
19651     }
19652     csp = csp->next;
19653   }
19654   return scp;
19655 }
19656 
19657 
FindStringConstraintInConstraintSetForFieldPair(FieldPairTypePtr fieldpair,ConstraintChoiceSetPtr csp)19658 NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp)
19659 {
19660   StringConstraintPtr scp;
19661   FieldTypePtr f;
19662 
19663   f = GetFromFieldFromFieldPair (fieldpair);
19664   scp = FindStringConstraintInConstraintSetForField (f, csp);
19665   f = FieldTypeFree (f);
19666   return scp;
19667 }
19668 
19669 
StringConstraintFromFieldEdit(FieldEditPtr edit)19670 NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit)
19671 {
19672   StringConstraintPtr scp;
19673 
19674   if (edit == NULL || edit->find_txt == NULL) return NULL;
19675   scp = StringConstraintNew ();
19676   scp->match_text = StringSave (edit->find_txt);
19677 
19678   switch (edit->location) {
19679     case Field_edit_location_anywhere :
19680       scp->match_location = String_location_contains;
19681       break;
19682     case Field_edit_location_beginning :
19683       scp->match_location = String_location_starts;
19684       break;
19685     case Field_edit_location_end :
19686       scp->match_location = String_location_ends;
19687       break;
19688   }
19689 
19690   scp->case_sensitive = !(edit->case_insensitive);
19691   scp->whole_word = FALSE;
19692   scp->not_present = FALSE;
19693 
19694   return scp;
19695 }
19696 
19697 
ApplyEditToString(CharPtr str,FieldEditPtr edit)19698 static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit)
19699 {
19700   CharPtr cp_found, new_str;
19701   Int4 found_len, replace_len, new_len;
19702 
19703   if (edit == NULL) return StringSave (str);
19704 
19705   str = StringSave (str);
19706   if (edit->case_insensitive) {
19707     cp_found = StringISearch (str, edit->find_txt);
19708   } else {
19709     cp_found = StringSearch (str, edit->find_txt);
19710   }
19711 
19712   found_len = StringLen (edit->find_txt);
19713   replace_len = StringLen (edit->repl_txt);
19714   while (cp_found != NULL)
19715   {
19716     if (edit->location == Field_edit_location_beginning
19717         && cp_found != str) {
19718       cp_found = NULL;
19719     } else if (edit->location == Field_edit_location_end
19720         && cp_found != str + StringLen (str) - found_len) {
19721       if (edit->case_insensitive) {
19722         cp_found = StringISearch (cp_found + found_len, edit->find_txt);
19723       } else {
19724         cp_found = StringSearch (cp_found + found_len, edit->find_txt);
19725       }
19726     } else {
19727       new_len = StringLen (str) + 1 - found_len + replace_len;
19728       new_str = (CharPtr) MemNew (new_len * sizeof (Char));
19729       if (new_str != NULL)
19730       {
19731         if (cp_found != str)
19732         {
19733           StringNCpy (new_str, str, cp_found - str);
19734         }
19735         StringCat (new_str, edit->repl_txt);
19736         StringCat (new_str, cp_found + found_len);
19737         cp_found = new_str + (cp_found - str) + replace_len;
19738         str = MemFree (str);
19739         str = new_str;
19740       }
19741       if (edit->case_insensitive) {
19742         cp_found = StringISearch (cp_found, edit->find_txt);
19743       } else {
19744         cp_found = StringSearch (cp_found, edit->find_txt);
19745       }
19746     }
19747   }
19748   return str;
19749 }
19750 
19751 
RemoveFieldNameFromString(CharPtr field_name,CharPtr str)19752 static void RemoveFieldNameFromString (CharPtr field_name, CharPtr str)
19753 {
19754   Uint4 field_name_len;
19755   CharPtr src, dst;
19756 
19757   if (StringHasNoText (field_name) || StringHasNoText (str)) {
19758     return;
19759   }
19760   field_name_len = StringLen (field_name);
19761 
19762   if (!StringHasNoText (field_name) && StringNICmp(str, field_name, field_name_len) == 0
19763         && StringLen (str) > field_name_len
19764         && str[field_name_len] == ' ')
19765   {
19766     src = str + field_name_len + 1;
19767     while (*src == ' ')
19768     {
19769       src++;
19770     }
19771     dst = str;
19772     while (*src != 0)
19773     {
19774       *dst = *src;
19775       dst++;
19776       src++;
19777     }
19778     *dst = 0;
19779   }
19780 }
19781 
19782 
19783 typedef struct objectcollection {
19784   AECRActionPtr action;
19785   ValNodePtr object_list;
19786   ValNodePtr object_tail;
19787   BatchExtraPtr batch_extra;
19788 } ObjectCollectionData, PNTR ObjectCollectionPtr;
19789 
19790 
AECRActionObjectCollectionItemCallback(Uint1 objecttype,Pointer objectdata,ObjectCollectionPtr o)19791 static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer objectdata, ObjectCollectionPtr o)
19792 {
19793   ApplyActionPtr a;
19794   EditActionPtr e;
19795   ConvertActionPtr v;
19796   CopyActionPtr c;
19797   SwapActionPtr s;
19798   RemoveActionPtr r;
19799   AECRParseActionPtr p;
19800   RemoveOutsideActionPtr ro;
19801   CharPtr str, portion, field_name;
19802   StringConstraintPtr scp;
19803   FieldTypePtr field_from = NULL, field_to = NULL;
19804 
19805   if (objectdata == NULL || o == NULL) return;
19806 
19807   /* check to make sure object is appropriate for field and meets filter */
19808   switch (o->action->action->choice) {
19809     case ActionChoice_apply :
19810       a = (ApplyActionPtr) o->action->action->data.ptrvalue;
19811       if (a != NULL
19812           && IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field)
19813           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19814         ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19815       }
19816       break;
19817     case ActionChoice_edit :
19818       e = (EditActionPtr) o->action->action->data.ptrvalue;
19819       if (e != NULL
19820           && IsObjectAppropriateForFieldValue (objecttype, objectdata, e->field)
19821           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19822         scp = StringConstraintFromFieldEdit (e->edit);
19823         str = GetFieldValueForObjectEx (objecttype, objectdata, e->field, scp, o->batch_extra);
19824         if (!StringHasNoText (str)) {
19825           ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19826         }
19827         str = MemFree (str);
19828       }
19829       break;
19830     case ActionChoice_remove_outside :
19831       ro = (RemoveOutsideActionPtr) o->action->action->data.ptrvalue;
19832       if (ro != NULL
19833           && IsObjectAppropriateForFieldValue (objecttype, objectdata, ro->field)
19834           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19835         ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19836       }
19837       break;
19838     case ActionChoice_convert :
19839       v = (ConvertActionPtr) o->action->action->data.ptrvalue;
19840       if (v != NULL
19841           && (field_from = GetFromFieldFromFieldPair(v->fields)) != NULL
19842           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
19843           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19844         scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint);
19845         str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra);
19846         if (v->strip_name) {
19847           field_to = GetToFieldFromFieldPair (v->fields);
19848           field_name = SummarizeFieldType (field_to);
19849           RemoveFieldNameFromString (field_name, str);
19850           field_name = MemFree (field_name);
19851           field_to = FieldTypeFree (field_to);
19852         }
19853         if (!StringHasNoText (str)) {
19854           ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19855         }
19856         str = MemFree (str);
19857       }
19858       field_from = FieldTypeFree (field_from);
19859       break;
19860     case ActionChoice_copy :
19861       c = (CopyActionPtr) o->action->action->data.ptrvalue;
19862       if (c != NULL
19863           && (field_from = GetFromFieldFromFieldPair(c->fields)) != NULL
19864           && (field_to = GetFromFieldFromFieldPair(c->fields)) != NULL
19865           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
19866           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
19867           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19868         ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19869       }
19870       field_from = FieldTypeFree (field_from);
19871       field_to = FieldTypeFree (field_to);
19872       break;
19873     case ActionChoice_swap :
19874       s = (SwapActionPtr) o->action->action->data.ptrvalue;
19875       if (s != NULL
19876           && (field_from = GetFromFieldFromFieldPair(s->fields)) != NULL
19877           && (field_to = GetFromFieldFromFieldPair(s->fields)) != NULL
19878           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
19879           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
19880           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19881         ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19882       }
19883       field_from = FieldTypeFree (field_from);
19884       field_to = FieldTypeFree (field_to);
19885       break;
19886     case ActionChoice_remove :
19887       r = (RemoveActionPtr) o->action->action->data.ptrvalue;
19888       if (r != NULL
19889           && IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field)
19890           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19891         ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19892       }
19893       break;
19894     case ActionChoice_parse :
19895       p = (AECRParseActionPtr) o->action->action->data.ptrvalue;
19896       if (p != NULL
19897           && (field_from = GetFromFieldFromFieldPair(p->fields)) != NULL
19898           && (field_to = GetFromFieldFromFieldPair(p->fields)) != NULL
19899           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
19900           && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
19901           && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
19902         scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint);
19903         str = GetFieldValueForObjectEx (objecttype, objectdata, field_from, scp, o->batch_extra);
19904         portion = GetTextPortionFromString (str, p->portion);
19905         ApplyTextTransformsToString (&portion, p->transform);
19906         if (!StringHasNoText (portion)) {
19907           ValNodeAddPointerEx (&(o->object_list), &(o->object_tail), objecttype, objectdata);
19908         }
19909         portion = MemFree (portion);
19910         str = MemFree (str);
19911       }
19912       field_from = FieldTypeFree (field_from);
19913       field_to = FieldTypeFree (field_to);
19914       break;
19915   }
19916 
19917 }
19918 
19919 
AECRActionObjectCollectionFeatureCallback(SeqFeatPtr sfp,Pointer data)19920 static void AECRActionObjectCollectionFeatureCallback (SeqFeatPtr sfp, Pointer data)
19921 {
19922   ObjectCollectionPtr o;
19923   if (sfp == NULL || data == NULL) return;
19924 
19925   o = (ObjectCollectionPtr) data;
19926   AECRActionObjectCollectionItemCallback (OBJ_SEQFEAT, sfp, o);
19927 
19928 }
19929 
19930 
AECRActionObjectCollectionDescriptorCallback(SeqDescrPtr sdp,Pointer data)19931 static void AECRActionObjectCollectionDescriptorCallback (SeqDescrPtr sdp, Pointer data)
19932 {
19933   ObjectCollectionPtr o;
19934 
19935   if (sdp == NULL || data == NULL) return;
19936 
19937   o = (ObjectCollectionPtr) data;
19938   AECRActionObjectCollectionItemCallback (OBJ_SEQDESC, sdp, o);
19939 }
19940 
19941 
AECRObjectCollectionBioseqCallback(BioseqPtr bsp,Pointer data)19942 static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data)
19943 {
19944   ObjectCollectionPtr o;
19945 
19946   if (bsp == NULL || data == NULL) return;
19947 
19948   o = (ObjectCollectionPtr) data;
19949   AECRActionObjectCollectionItemCallback (OBJ_BIOSEQ, bsp, o);
19950 }
19951 
19952 
CollectMiscObjectsForApply(SeqEntryPtr sep,Int4 misc_type,ValNodePtr constraint)19953 static ValNodePtr CollectMiscObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint)
19954 {
19955   ValNodePtr     target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp;
19956 
19957   if (sep == NULL) {
19958     return NULL;
19959   }
19960 
19961   /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */
19962   bsp_list = CollectNucBioseqs (sep);
19963   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
19964     if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
19965       ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue);
19966     }
19967   }
19968   bsp_list = ValNodeFree (bsp_list);
19969 
19970   if (misc_type == Misc_field_genome_project_id) {
19971     target_list = tmp_list;
19972     tmp_list = NULL;
19973   } else if (misc_type == Misc_field_comment_descriptor) {
19974     for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
19975       AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
19976     }
19977   } else if (misc_type == Misc_field_defline) {
19978     for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
19979       AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
19980     }
19981     tmp_list = ValNodeFree (tmp_list);
19982   } else if (misc_type == Misc_field_keyword) {
19983     for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
19984       AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
19985     }
19986     tmp_list = ValNodeFree (tmp_list);
19987   }
19988   tmp_list = ValNodeFree (tmp_list);
19989   return target_list;
19990 }
19991 
19992 
AddDBLinkDescriptorDestinationsForBioseq(BioseqPtr bsp,ValNodePtr PNTR dest_list)19993 static void AddDBLinkDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
19994 {
19995   SeqDescrPtr sdp;
19996   UserObjectPtr uop;
19997   SeqMgrDescContext context;
19998   Boolean found = FALSE;
19999   ObjValNodePtr ovp;
20000 
20001   if (bsp == NULL || dest_list == NULL) {
20002     return;
20003   }
20004 
20005   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
20006        sdp != NULL;
20007        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
20008     if ((uop = sdp->data.ptrvalue) != NULL
20009         && IsUserObjectDBLink (uop)) {
20010       ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
20011       found = TRUE;
20012     }
20013   }
20014   if (!found) {
20015     /* if no existing comment descriptor, create one, marked for delete.
20016      * unmark it for deletion when it gets populated.
20017      */
20018     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
20019     sdp->data.ptrvalue = CreateDBLinkUserObject ();
20020     ovp = (ObjValNodePtr) sdp;
20021     ovp->idx.deleteme = TRUE;
20022     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
20023   }
20024 }
20025 
20026 
CollectDBLinkObjectsForApply(SeqEntryPtr sep,Int4 misc_type,ValNodePtr constraint)20027 static ValNodePtr CollectDBLinkObjectsForApply (SeqEntryPtr sep, Int4 misc_type, ValNodePtr constraint)
20028 {
20029   ValNodePtr     target_list = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp;
20030 
20031   if (sep == NULL) {
20032     return NULL;
20033   }
20034 
20035   /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */
20036   bsp_list = CollectNucBioseqs (sep);
20037   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
20038     if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
20039       AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
20040     }
20041   }
20042   bsp_list = ValNodeFree (bsp_list);
20043 
20044   return target_list;
20045 }
20046 
20047 
AddStructuredCommentDescriptorDestinationsForBioseq(BioseqPtr bsp,ValNodePtr PNTR dest_list,ValNodePtr PNTR dest_tail)20048 static void AddStructuredCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list, ValNodePtr PNTR dest_tail)
20049 {
20050   SeqDescrPtr sdp;
20051   SeqMgrDescContext context;
20052   Boolean found = FALSE;
20053   ObjValNodePtr ovp;
20054   UserObjectPtr uop;
20055 
20056   if (bsp == NULL || dest_list == NULL || dest_tail == NULL) {
20057     return;
20058   }
20059 
20060   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
20061        sdp != NULL;
20062        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
20063     if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) {
20064       ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp);
20065       found = TRUE;
20066     }
20067   }
20068   if (!found) {
20069     /* if no existing structured comment descriptor, create one, marked for delete.
20070      * unmark it for deletion when it gets populated.
20071      */
20072     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_user);
20073     uop = UserObjectNew ();
20074     uop->type = ObjectIdNew ();
20075     uop->type->str = StringSave ("StructuredComment");
20076     sdp->data.ptrvalue = uop;
20077     ovp = (ObjValNodePtr) sdp;
20078     ovp->idx.deleteme = TRUE;
20079     ValNodeAddPointerEx (dest_list, dest_tail, OBJ_SEQDESC, sdp);
20080   }
20081 }
20082 
20083 
CollectStructuredCommentsForApply(SeqEntryPtr sep,ValNodePtr constraint)20084 static ValNodePtr CollectStructuredCommentsForApply (SeqEntryPtr sep, ValNodePtr constraint)
20085 {
20086   ValNodePtr     target_list = NULL, target_tail = NULL, bsp_list = NULL, tmp_list = NULL, tmp_tail = NULL, vnp;
20087 
20088   if (sep == NULL) {
20089     return NULL;
20090   }
20091 
20092   /* VisitBioseqsInSep (sep, &bsp_list, CollectNucBioseqCallback); */
20093   bsp_list = CollectNucBioseqs (sep);
20094   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
20095     if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
20096       ValNodeAddPointerEx (&tmp_list, &tmp_tail, vnp->choice, vnp->data.ptrvalue);
20097     }
20098   }
20099   bsp_list = ValNodeFree (bsp_list);
20100 
20101 
20102   for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
20103     AddStructuredCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list, &target_tail);
20104   }
20105   tmp_list = ValNodeFree (tmp_list);
20106   return target_list;
20107 }
20108 
20109 
GetObjectListForAECRActionEx(SeqEntryPtr sep,AECRActionPtr action,BatchExtraPtr batch_extra)20110 NLM_EXTERN ValNodePtr GetObjectListForAECRActionEx (SeqEntryPtr sep, AECRActionPtr action, BatchExtraPtr batch_extra)
20111 {
20112   ObjectCollectionData ocd;
20113   ApplyActionPtr apply;
20114   Uint1 field_type;
20115 
20116   if (action == NULL) return NULL;
20117 
20118   ocd.action = action;
20119   ocd.object_list = NULL;
20120   ocd.object_tail = NULL;
20121   if (batch_extra == NULL) {
20122     ocd.batch_extra = BatchExtraNew ();
20123     InitBatchExtraForAECRAction (ocd.batch_extra, action, sep);
20124   } else {
20125     ocd.batch_extra = batch_extra;
20126   }
20127 
20128   field_type = FieldTypeFromAECRAction (action);
20129   if (field_type == FieldType_molinfo_field) {
20130     VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback);
20131   } else if (field_type == FieldType_misc
20132              && action->action != NULL
20133              && action->action->choice == ActionChoice_apply
20134              && (apply = action->action->data.ptrvalue) != NULL) {
20135     ocd.object_list = CollectMiscObjectsForApply (sep, apply->field->data.intvalue, action->constraint);
20136   } else if (field_type == FieldType_dblink
20137              && action->action != NULL
20138              && action->action->choice == ActionChoice_apply
20139              && (apply = action->action->data.ptrvalue) != NULL) {
20140     ocd.object_list = CollectDBLinkObjectsForApply (sep, apply->field->data.intvalue, action->constraint);
20141   } else if (field_type == FieldType_struc_comment_field) {
20142     ocd.object_list = CollectStructuredCommentsForApply (sep, action->constraint);
20143   } else {
20144     VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback);
20145     VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback);
20146     if (field_type == FieldType_misc) {
20147       VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback);
20148     }
20149   }
20150 
20151   if (batch_extra != ocd.batch_extra) {
20152     ocd.batch_extra = BatchExtraFree (ocd.batch_extra);
20153   }
20154   return ocd.object_list;
20155 }
20156 
20157 
GetObjectListForAECRAction(SeqEntryPtr sep,AECRActionPtr action)20158 NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action)
20159 {
20160   return GetObjectListForAECRActionEx (sep, action, NULL);
20161 }
20162 
20163 
FreeObjectList(ValNodePtr vnp)20164 NLM_EXTERN ValNodePtr FreeObjectList (ValNodePtr vnp)
20165 {
20166   ValNodePtr vnp_next;
20167 
20168   while (vnp != NULL) {
20169     vnp_next = vnp->next;
20170     vnp->next = NULL;
20171     if (vnp->choice == 0) {
20172       vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue);
20173     }
20174     vnp = ValNodeFree (vnp);
20175     vnp = vnp_next;
20176   }
20177   return vnp;
20178 }
20179 
20180 
20181 typedef struct buildcgpset
20182 {
20183   ValNodePtr cds_list;
20184   ValNodePtr mrna_list;
20185   ValNodePtr gene_list;
20186 } BuildCGPSetData, PNTR BuildCGPSetPtr;
20187 
BuildCGPSetCallback(SeqFeatPtr sfp,Pointer userdata)20188 static void BuildCGPSetCallback (SeqFeatPtr sfp, Pointer userdata)
20189 {
20190   BuildCGPSetPtr b;
20191 
20192   if (sfp == NULL || sfp->idx.deleteme || userdata == NULL) return;
20193   b = (BuildCGPSetPtr) userdata;
20194   if (sfp->data.choice == SEQFEAT_CDREGION)
20195   {
20196     ValNodeAddPointer (&(b->cds_list), OBJ_SEQFEAT, sfp);
20197   }
20198   else if (sfp->data.choice == SEQFEAT_GENE)
20199   {
20200     ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp);
20201   }
20202   else if (sfp->idx.subtype == FEATDEF_mRNA)
20203   {
20204     ValNodeAddPointer (&(b->mrna_list), OBJ_SEQFEAT, sfp);
20205   }
20206   else if (SeqMgrGetGeneXref (sfp) != NULL)
20207   {
20208     ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp);
20209   }
20210 }
20211 
20212 
AddProtFeatForCds(SeqFeatPtr cds,BioseqPtr protbsp)20213 static SeqFeatPtr AddProtFeatForCds (SeqFeatPtr cds, BioseqPtr protbsp)
20214 {
20215   ProtRefPtr prp;
20216   SeqFeatPtr prot;
20217   Boolean    partial5, partial3;
20218 
20219   if (cds == NULL || protbsp == NULL)
20220   {
20221     return NULL;
20222   }
20223 
20224   prp = ProtRefNew ();
20225   prot = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL);
20226   if (prot != NULL)
20227   {
20228     prot->data.value.ptrvalue = prp;
20229     CheckSeqLocForPartial (cds->location, &partial5, &partial3);
20230     SetSeqLocPartial (prot->location, partial5, partial3);
20231     prot->partial = partial5 || partial3;
20232   }
20233   return prot;
20234 }
20235 
20236 
BuildCGPSetFromCodingRegion(SeqFeatPtr cds,BoolPtr indexing_needed)20237 static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed)
20238 {
20239   SeqMgrFeatContext fcontext;
20240   SeqFeatPtr        gene = NULL, mrna, prot;
20241   BioseqPtr         protbsp;
20242   CGPSetPtr         cdsp;
20243 
20244   if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return NULL;
20245 
20246   cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData));
20247   ValNodeAddPointer (&(cdsp->cds_list), 0, cds);
20248 
20249   gene = GetGeneForFeature (cds);
20250   if (gene != NULL)
20251   {
20252     ValNodeAddPointer (&(cdsp->gene_list), 0, gene);
20253     /* mark gene, so that we'll know it isn't lonely */
20254     gene->idx.deleteme = TRUE;
20255   }
20256 
20257   mrna = GetmRNAforCDS (cds);
20258   if (mrna != NULL)
20259   {
20260     ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna);
20261     /* mark mrna, so that we'll know it's already in a set */
20262     mrna->idx.deleteme = TRUE;
20263   }
20264 
20265   if (cds->product != NULL)
20266   {
20267     protbsp = BioseqFindFromSeqLoc (cds->product);
20268     if (protbsp != NULL)
20269     {
20270       prot = SeqMgrGetBestProteinFeature (protbsp, NULL);
20271       if (prot == NULL) {
20272         prot = GetBestProteinFeatureUnindexed (cds->product);
20273       }
20274 
20275       /* if there is no full-length protein feature, make one */
20276       if (prot == NULL)
20277       {
20278         prot = AddProtFeatForCds (cds, protbsp);
20279         if (prot != NULL)
20280         {
20281           ResynchCDSPartials (cds, NULL);
20282           if (indexing_needed != NULL)
20283           {
20284             *indexing_needed = TRUE;
20285           }
20286         }
20287       }
20288       if (prot != NULL)
20289       {
20290         ValNodeAddPointer (&(cdsp->prot_list), 0, prot);
20291       }
20292 
20293       /* also add in mat_peptides from protein feature */
20294       prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext);
20295       while (prot != NULL)
20296       {
20297         ValNodeAddPointer (&(cdsp->prot_list), 0, prot);
20298         prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext);
20299       }
20300     }
20301   }
20302   return cdsp;
20303 }
20304 
20305 
BuildCGPSetFrommRNA(SeqFeatPtr mrna)20306 static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna)
20307 {
20308   SeqFeatPtr        gene;
20309   CGPSetPtr          cdsp;
20310 
20311   if (mrna == NULL || mrna->idx.deleteme || mrna->idx.subtype != FEATDEF_mRNA) return NULL;
20312 
20313   cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData));
20314   ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna);
20315 
20316   gene = GetGeneForFeature (mrna);
20317   if (gene != NULL)
20318   {
20319     ValNodeAddPointer (&(cdsp->gene_list), 0, gene);
20320     /* mark gene, so that we'll know it isn't lonely */
20321     gene->idx.deleteme = TRUE;
20322   }
20323 
20324   return cdsp;
20325 }
20326 
20327 
BuildCGPSetFromGene(SeqFeatPtr gene)20328 static CGPSetPtr BuildCGPSetFromGene (SeqFeatPtr gene)
20329 {
20330   CGPSetPtr          cdsp;
20331 
20332   if (gene == NULL || gene->idx.deleteme || gene->idx.subtype != FEATDEF_GENE) {
20333     return NULL;
20334   }
20335 
20336   cdsp = CGPSetNew ();
20337   ValNodeAddPointer (&(cdsp->gene_list), 0, gene);
20338   return cdsp;
20339 }
20340 
20341 
20342 static void
AdjustCGPObjectListForMatPeptides(ValNodePtr PNTR cgp_list,FieldTypePtr field1,FieldTypePtr field2,ConstraintChoiceSetPtr constraints)20343 AdjustCGPObjectListForMatPeptides
20344 (ValNodePtr PNTR cgp_list,
20345  FieldTypePtr field1,
20346  FieldTypePtr field2,
20347  ConstraintChoiceSetPtr constraints)
20348 {
20349   ConstraintChoiceSetPtr mat_peptide_constraints = NULL;
20350   ValNodePtr vnp, vnp_prev, vnp_next;
20351   ValNodePtr m_vnp, m_vnp_prev, m_vnp_next, mat_peptide_list;
20352   CGPSetPtr  cdsp;
20353   SeqFeatPtr sfp;
20354 
20355   if (cgp_list == NULL
20356       || *cgp_list == NULL
20357       || constraints == NULL
20358       || (field1 == NULL && field2 == NULL) /* no fields specified */
20359       || (!IsFieldTypeMatPeptideRelated (field1) && !IsFieldTypeMatPeptideRelated(field2))) {
20360     return;
20361   }
20362 
20363   /* get list of constraints that apply to mat-peptide features */
20364   while (constraints != NULL) {
20365     if (IsConstraintChoiceMatPeptideRelated (constraints)) {
20366       ValNodeLink (&mat_peptide_constraints, AsnIoMemCopy (constraints, (AsnReadFunc) ConstraintChoiceAsnRead, (AsnWriteFunc) ConstraintChoiceAsnWrite));
20367     }
20368     constraints = constraints->next;
20369   }
20370   if (mat_peptide_constraints == NULL) {
20371     return;
20372   }
20373 
20374   /* if both fields are mat-peptide related, or one is mat-peptide related and the other is NULL,
20375    * convert sets to lists of mat-peptide features
20376    * otherwise just remove mat-peptide features from the prot-list that do not match the constraints.
20377    */
20378   if ((field1 != NULL && !IsFieldTypeMatPeptideRelated (field1))
20379       || (field2 != NULL && !IsFieldTypeMatPeptideRelated (field2))) {
20380     for (vnp = *cgp_list; vnp != NULL; vnp = vnp->next) {
20381       if (vnp->choice == 0) {
20382         cdsp = (CGPSetPtr) vnp->data.ptrvalue;
20383         m_vnp_prev = NULL;
20384         for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp_next) {
20385           m_vnp_next = m_vnp->next;
20386           sfp = m_vnp->data.ptrvalue;
20387           if (sfp == NULL
20388               || (sfp->idx.subtype == FEATDEF_mat_peptide_aa
20389                   && !DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints))) {
20390             if (m_vnp_prev == NULL) {
20391               cdsp->prot_list = m_vnp->next;
20392             } else {
20393               m_vnp_prev->next = m_vnp->next;
20394             }
20395             m_vnp->next = NULL;
20396             m_vnp = ValNodeFree (m_vnp);
20397           } else {
20398             m_vnp_prev = m_vnp;
20399           }
20400         }
20401       }
20402     }
20403   } else {
20404     vnp_prev = NULL;
20405     for (vnp = *cgp_list; vnp != NULL; vnp = vnp_next) {
20406       vnp_next = vnp->next;
20407       if (vnp->choice == 0) {
20408         mat_peptide_list = NULL;
20409         cdsp = (CGPSetPtr) vnp->data.ptrvalue;
20410         for (m_vnp = cdsp->prot_list; m_vnp != NULL; m_vnp = m_vnp->next) {
20411           sfp = m_vnp->data.ptrvalue;
20412           if (sfp->idx.subtype == FEATDEF_mat_peptide_aa
20413               && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, mat_peptide_constraints)) {
20414             ValNodeAddPointer (&mat_peptide_list, OBJ_SEQFEAT, sfp);
20415           }
20416         }
20417         if (mat_peptide_list == NULL) {
20418           if (vnp_prev == NULL) {
20419             *cgp_list = vnp->next;
20420           } else {
20421             vnp_prev->next = vnp->next;
20422           }
20423           vnp->next = NULL;
20424           vnp = FreeObjectList (vnp);
20425         } else {
20426           m_vnp = mat_peptide_list;
20427           while (m_vnp->next != NULL) {
20428             m_vnp = m_vnp->next;
20429           }
20430           if (vnp_prev == NULL) {
20431             *cgp_list = mat_peptide_list;
20432           } else {
20433             vnp_prev->next = mat_peptide_list;
20434           }
20435           m_vnp->next = vnp_next;
20436           vnp_prev = m_vnp;
20437           vnp->next = NULL;
20438           vnp = FreeObjectList (vnp);
20439         }
20440       } else {
20441         vnp_prev = vnp;
20442       }
20443     }
20444   }
20445   mat_peptide_constraints = ConstraintChoiceSetFree (mat_peptide_constraints);
20446 }
20447 
20448 
BuildCGPSetList(Uint2 entityID,AECRActionPtr act,BoolPtr created_protein_features)20449 static ValNodePtr BuildCGPSetList (Uint2 entityID, AECRActionPtr act, BoolPtr created_protein_features)
20450 {
20451   SeqEntryPtr    sep;
20452   BuildCGPSetData b;
20453   CGPSetPtr       cdsp;
20454   ValNodePtr     vnp, vnp_next, vnp_prev;
20455   ValNodePtr     cdset_list = NULL;
20456   SeqFeatPtr     cds, gene, mrna;
20457   Boolean        need_indexing = FALSE;
20458   ApplyActionPtr      a;
20459   EditActionPtr       e;
20460   ConvertActionPtr    c;
20461   CopyActionPtr       cp;
20462   SwapActionPtr       s;
20463   AECRParseActionPtr  pa;
20464   RemoveActionPtr     r;
20465   RemoveOutsideActionPtr ro;
20466   FieldTypePtr        field_from, field_to;
20467 
20468   sep = GetTopSeqEntryForEntityID (entityID);
20469 
20470   b.cds_list = NULL;
20471   b.gene_list = NULL;
20472   b.mrna_list = NULL;
20473 
20474   if (created_protein_features != NULL) {
20475     *created_protein_features = FALSE;
20476   }
20477 
20478   VisitFeaturesInSep (sep, &b, BuildCGPSetCallback);
20479 
20480   /* build cdsets that have coding regions */
20481   for (vnp = b.cds_list; vnp != NULL; vnp = vnp->next)
20482   {
20483     cds = (SeqFeatPtr) vnp->data.ptrvalue;
20484     if (cds == NULL) continue;
20485     cdsp = BuildCGPSetFromCodingRegion (cds, &need_indexing);
20486     if (cdsp != NULL)
20487     {
20488       ValNodeAddPointer (&cdset_list, 0, cdsp);
20489     }
20490   }
20491   if (need_indexing)
20492   {
20493     /* indexing because we have created full-length protein features */
20494     SeqMgrIndexFeatures (entityID, NULL);
20495     if (created_protein_features != NULL) {
20496       *created_protein_features = TRUE;
20497     }
20498   }
20499 
20500   /* build cdsets for mrna features that don't have coding regions */
20501   for (vnp = b.mrna_list; vnp != NULL; vnp = vnp->next)
20502   {
20503     mrna = (SeqFeatPtr) vnp->data.ptrvalue;
20504     if (mrna == NULL || mrna->idx.deleteme) continue;
20505     cdsp = BuildCGPSetFrommRNA (mrna);
20506     if (cdsp != NULL)
20507     {
20508       ValNodeAddPointer (&cdset_list, 0, cdsp);
20509     }
20510   }
20511 
20512   /* build cdsets for lonely genes / features with gene xrefs that are not coding regions or mrnas */
20513   for (vnp = b.gene_list; vnp != NULL; vnp = vnp->next)
20514   {
20515     gene = (SeqFeatPtr) vnp->data.ptrvalue;
20516     if (gene == NULL || gene->idx.deleteme) continue;
20517     cdsp = BuildCGPSetFromGene (gene);
20518     if (cdsp != NULL) {
20519       ValNodeAddPointer (&cdset_list, 0, cdsp);
20520     }
20521   }
20522 
20523   /* now unmark features */
20524   UnmarkFeatureList (b.cds_list);
20525   UnmarkFeatureList (b.mrna_list);
20526   UnmarkFeatureList (b.gene_list);
20527 
20528   b.cds_list = ValNodeFree (b.cds_list);
20529   b.mrna_list = ValNodeFree (b.mrna_list);
20530   b.gene_list = ValNodeFree (b.gene_list);
20531 
20532   /* now remove sets that don't match our choice constraint */
20533   if (act != NULL && act->constraint != NULL) {
20534     vnp_prev = NULL;
20535     for (vnp = cdset_list; vnp != NULL; vnp = vnp_next)
20536     {
20537       vnp_next = vnp->next;
20538       if (!DoesObjectMatchConstraintChoiceSet (0, vnp->data.ptrvalue, act->constraint))
20539       {
20540         if (vnp_prev == NULL)
20541         {
20542           cdset_list = vnp->next;
20543         }
20544         else
20545         {
20546           vnp_prev->next = vnp->next;
20547         }
20548         vnp->next = NULL;
20549         FreeCGPSetList (vnp);
20550       }
20551       else
20552       {
20553         vnp_prev = vnp;
20554       }
20555     }
20556   }
20557 
20558   /* adjust if action fields are mat-peptide specific */
20559   if (act != NULL && act->action != NULL && act->action->data.ptrvalue != NULL) {
20560     switch (act->action->choice) {
20561       case ActionChoice_apply:
20562         a = (ApplyActionPtr) act->action->data.ptrvalue;
20563         AdjustCGPObjectListForMatPeptides (&cdset_list, a->field, NULL, act->constraint);
20564         break;
20565       case ActionChoice_edit:
20566         e = (EditActionPtr) act->action->data.ptrvalue;
20567         AdjustCGPObjectListForMatPeptides (&cdset_list, e->field, NULL, act->constraint);
20568         break;
20569       case ActionChoice_remove_outside:
20570         ro = (RemoveOutsideActionPtr) act->action->data.ptrvalue;
20571         AdjustCGPObjectListForMatPeptides (&cdset_list, ro->field, NULL, act->constraint);
20572         break;
20573       case ActionChoice_convert:
20574         c = (ConvertActionPtr) act->action->data.ptrvalue;
20575         field_from = GetFromFieldFromFieldPair (c->fields);
20576         field_to = GetToFieldFromFieldPair (c->fields);
20577         AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint);
20578         field_from = FieldTypeFree (field_from);
20579         field_to = FieldTypeFree (field_to);
20580         break;
20581       case ActionChoice_copy:
20582         cp = (CopyActionPtr) act->action->data.ptrvalue;
20583         field_from = GetFromFieldFromFieldPair (cp->fields);
20584         field_to = GetToFieldFromFieldPair (cp->fields);
20585         AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint);
20586         field_from = FieldTypeFree (field_from);
20587         field_to = FieldTypeFree (field_to);
20588         break;
20589        case ActionChoice_swap:
20590         s = (SwapActionPtr) act->action->data.ptrvalue;
20591         field_from = GetFromFieldFromFieldPair (s->fields);
20592         field_to = GetToFieldFromFieldPair (s->fields);
20593         AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint);
20594         field_from = FieldTypeFree (field_from);
20595         field_to = FieldTypeFree (field_to);
20596         break;
20597       case ActionChoice_remove:
20598         r = (RemoveActionPtr) act->action->data.ptrvalue;
20599         AdjustCGPObjectListForMatPeptides (&cdset_list, r->field, NULL, act->constraint);
20600         break;
20601       case ActionChoice_parse:
20602         pa = (AECRParseActionPtr) act->action->data.ptrvalue;
20603         field_from = GetFromFieldFromFieldPair (pa->fields);
20604         field_to = GetToFieldFromFieldPair (pa->fields);
20605         AdjustCGPObjectListForMatPeptides (&cdset_list, field_from, field_to, act->constraint);
20606         field_from = FieldTypeFree (field_from);
20607         field_to = FieldTypeFree (field_to);
20608         break;
20609     }
20610   }
20611   return cdset_list;
20612 }
20613 
20614 
AlsoChangeMrnaForObject(Uint1 choice,Pointer data)20615 static Boolean AlsoChangeMrnaForObject (Uint1 choice, Pointer data)
20616 {
20617   CharPtr str;
20618   SeqFeatPtr sfp, mrna, cds;
20619   BioseqPtr prot;
20620   FeatureField f;
20621   Boolean rval = FALSE;
20622 
20623   if (choice == 0) {
20624     str = GetFieldValueFromCGPSet (data, CDSGeneProt_field_prot_name, NULL);
20625     rval = SetFieldValueInCGPSet (data, CDSGeneProt_field_mrna_product, NULL, str, ExistingTextOption_replace_old);
20626     str = MemFree (str);
20627   } else if (choice == OBJ_SEQFEAT) {
20628     sfp = (SeqFeatPtr) data;
20629     if (sfp != NULL) {
20630       if (sfp->data.choice == SEQFEAT_CDREGION) {
20631         mrna = GetmRNAforCDS (sfp);
20632       } else if (sfp->data.choice == SEQFEAT_PROT) {
20633         prot = BioseqFindFromSeqLoc(sfp->location);
20634         cds = SeqMgrGetCDSgivenProduct (prot, NULL);
20635         mrna = GetmRNAforCDS (cds);
20636       }
20637       if (mrna != NULL) {
20638         if (sfp->data.choice == SEQFEAT_CDREGION) {
20639           f.type = Macro_feature_type_cds;
20640         } else {
20641           f.type = Macro_feature_type_prot;
20642         }
20643         f.field = ValNodeNew(NULL);
20644         f.field->next = NULL;
20645         f.field->choice = FeatQualChoice_legal_qual;
20646         f.field->data.intvalue = Feat_qual_legal_product;
20647         str = GetQualFromFeature (sfp, &f, NULL);
20648         f.type = Macro_feature_type_mRNA;
20649         rval = SetQualOnFeature (mrna, &f, NULL, str, ExistingTextOption_replace_old);
20650         str = MemFree (str);
20651         f.field = ValNodeFree (f.field);
20652       }
20653     }
20654   }
20655   return rval;
20656 }
20657 
20658 
DoApplyActionToObjectListEx(ApplyActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp,BatchExtraPtr batch_extra)20659 NLM_EXTERN Int4 DoApplyActionToObjectListEx (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
20660 {
20661   ValNodePtr vnp;
20662   Int4       num_succeed = 0, num_fail = 0;
20663   CharPtr    old_str, new_str;
20664 
20665   if (action == NULL || object_list == NULL) return 0;
20666 
20667   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20668     old_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra);
20669     if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text, batch_extra)) {
20670       new_str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra);
20671       if (StringCmp (old_str, new_str) != 0) {
20672         if (also_change_mrna) {
20673           AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
20674         }
20675         num_succeed ++;
20676       }
20677       new_str = MemFree (new_str);
20678     } else {
20679       num_fail++;
20680     }
20681     old_str = MemFree (old_str);
20682   }
20683 
20684   return num_succeed;
20685 }
20686 
20687 
DoApplyActionToObjectList(ApplyActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp)20688 NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp)
20689 {
20690   return DoApplyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL);
20691 }
20692 
20693 
DoEditActionToObjectListEx(EditActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,BatchExtraPtr batch_extra)20694 NLM_EXTERN Int4 DoEditActionToObjectListEx (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, BatchExtraPtr batch_extra)
20695 {
20696   ValNodePtr vnp;
20697   Int4       num_succeed = 0, num_fail = 0;
20698   StringConstraintPtr scp;
20699   CharPtr    str, new_str;
20700 
20701   if (action == NULL || object_list == NULL) return 0;
20702 
20703   scp = StringConstraintFromFieldEdit (action->edit);
20704 
20705   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20706     str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, batch_extra);
20707     new_str = ApplyEditToString (str, action->edit);
20708     if (StringCmp (str, new_str) != 0
20709         && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, scp, new_str, ExistingTextOption_replace_old, batch_extra)) {
20710       if (also_change_mrna) {
20711         AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
20712       }
20713       num_succeed ++;
20714     } else {
20715       num_fail++;
20716     }
20717     new_str = MemFree (new_str);
20718     str = MemFree (str);
20719   }
20720   return num_succeed;
20721 }
20722 
20723 
DoEditActionToObjectList(EditActionPtr action,ValNodePtr object_list,Boolean also_change_mrna)20724 NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list, Boolean also_change_mrna)
20725 {
20726   return DoEditActionToObjectListEx (action, object_list, also_change_mrna, NULL);
20727 }
20728 
20729 
HasMarkers(CharPtr str,TextPortionPtr text_portion)20730 static Boolean HasMarkers (CharPtr str, TextPortionPtr text_portion)
20731 {
20732   Boolean rval = FALSE;
20733   Int4    left_len = 0, right_len = 0;
20734 
20735   if (IsTextMarkerEmpty(text_portion->left_marker)) {
20736     if (IsTextMarkerEmpty(text_portion->right_marker)) {
20737       /* both markers empty, badly formatted command */
20738       rval = FALSE;
20739     } else if (NULL != FindTextMarker(str, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word)) {
20740       rval = TRUE;
20741     }
20742   } else if (NULL == FindTextMarker(str, &left_len, text_portion->left_marker, text_portion->case_sensitive, text_portion->whole_word)) {
20743     rval = FALSE;
20744   } else if (IsTextMarkerEmpty(text_portion->right_marker)
20745              || NULL != FindTextMarker(str + left_len, &right_len, text_portion->right_marker, text_portion->case_sensitive, text_portion->whole_word)) {
20746     rval = TRUE;
20747   }
20748 
20749   return rval;
20750 }
20751 
20752 
DoRemoveOutsideToObjectList(RemoveOutsideActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,BatchExtraPtr batch_extra)20753 NLM_EXTERN Int4 DoRemoveOutsideToObjectList (RemoveOutsideActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, BatchExtraPtr batch_extra)
20754 {
20755   ValNodePtr vnp;
20756   Int4       num_succeed = 0, num_fail = 0;
20757   CharPtr    str, new_str;
20758 
20759   if (action == NULL || object_list == NULL) return 0;
20760 
20761   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20762     str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, NULL, batch_extra);
20763     if (!action->remove_if_not_found && !HasMarkers (str, action->portion)) {
20764       /* do nothing */
20765     } else {
20766       new_str = GetTextPortionFromString (str, action->portion);
20767       if (StringCmp (str, new_str) != 0
20768           && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, action->field, NULL, new_str, ExistingTextOption_replace_old, batch_extra)) {
20769         if (also_change_mrna) {
20770           AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
20771         }
20772         num_succeed ++;
20773       } else {
20774         num_fail++;
20775       }
20776       new_str = MemFree (new_str);
20777     }
20778     str = MemFree (str);
20779   }
20780   return num_succeed;
20781 }
20782 
20783 
20784 static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft);
20785 
20786 
NoFieldChange(CharPtr new_val,ValNodePtr vnp,FieldTypePtr field_from,StringConstraintPtr scp,BatchExtraPtr batch_extra)20787 static Boolean NoFieldChange (CharPtr new_val, ValNodePtr vnp, FieldTypePtr field_from, StringConstraintPtr scp, BatchExtraPtr batch_extra)
20788 {
20789   Boolean rval = FALSE;
20790   CharPtr orig = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
20791   if (StringCmp (orig, new_val) == 0) {
20792     rval = TRUE;
20793   }
20794   orig = MemFree (orig);
20795   return rval;
20796 }
20797 
20798 
AddValuesToList(ValNodePtr apply,ValNodePtr PNTR current,Uint2 existing_text)20799 static Boolean AddValuesToList(ValNodePtr apply, ValNodePtr PNTR current, Uint2 existing_text)
20800 {
20801   ValNodePtr vnp_a, vnp_c;
20802   Boolean    rval = FALSE;
20803   CharPtr    str;
20804 
20805   if (apply == NULL) {
20806     return FALSE;
20807   } else if (existing_text == ExistingTextOption_leave_old && current != NULL && *current != NULL) {
20808     return FALSE;
20809   } else if (existing_text == ExistingTextOption_add_qual) {
20810     for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) {
20811       ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue)));
20812     }
20813     rval = TRUE;
20814   } else if (existing_text == ExistingTextOption_replace_old) {
20815     *current = ValNodeFreeData (*current);
20816     for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) {
20817       ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue)));
20818     }
20819     rval = TRUE;
20820   } else {
20821     for (vnp_a = apply; vnp_a != NULL; vnp_a = vnp_a->next) {
20822       if (*current == NULL) {
20823         ValNodeAddPointer (current, vnp_a->choice, StringSave ((CharPtr)(vnp_a->data.ptrvalue)));
20824         rval = TRUE;
20825       } else {
20826         for (vnp_c = *current; vnp_c != NULL; vnp_c = vnp_c->next) {
20827           str = (CharPtr)(vnp_c->data.ptrvalue);
20828           rval |= SetStringValue(&str, (CharPtr)(vnp_a->data.ptrvalue), existing_text);
20829           vnp_c->data.ptrvalue = str;
20830         }
20831       }
20832     }
20833   }
20834   return rval;
20835 }
20836 
20837 
DoConvertActionToObjectListEx(ConvertActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp,BatchExtraPtr batch_extra)20838 NLM_EXTERN Int4 DoConvertActionToObjectListEx (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
20839 {
20840   ValNodePtr vnp;
20841   Int4       num_succeed = 0;
20842   CharPtr    str, from_val, field_name = NULL;
20843   FieldTypePtr field_from, field_to;
20844   Boolean          already_added, field_change;
20845   ValNodePtr       val_list_from, val_list_to, val_vnp;
20846 
20847   if (action == NULL || object_list == NULL || action->fields == NULL) return 0;
20848 
20849   field_from = GetFromFieldFromFieldPair (action->fields);
20850   field_to = GetToFieldFromFieldPair (action->fields);
20851 
20852   if (action->strip_name) {
20853     field_name = SummarizeFieldType (field_to);
20854   }
20855 
20856   if (action->fields->choice == FieldPairType_molinfo_field) {
20857     for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20858       str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, NULL, batch_extra);
20859       from_val = GetSequenceQualValName (field_from->data.ptrvalue);
20860       if (StringCmp (str, from_val) == 0
20861           && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, ExistingTextOption_replace_old, batch_extra)) {
20862         num_succeed ++;
20863       }
20864       str = MemFree (str);
20865     }
20866   } else {
20867     for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20868       /* there may be multiple qualifiers */
20869       val_list_from = GetMultipleFieldValuesForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
20870       val_list_to = GetMultipleFieldValuesForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra);
20871       for (val_vnp = val_list_from; val_vnp != NULL; val_vnp = val_vnp->next) {
20872         str = (CharPtr)(val_vnp->data.ptrvalue);
20873         if (action->strip_name) {
20874           RemoveFieldNameFromString (field_name, str);
20875         }
20876         FixCapitalizationInString(&str, action->capitalization, NULL);
20877         val_vnp->data.ptrvalue = str;
20878       }
20879       field_change = AddValuesToList(val_list_from, &val_list_to, action->existing_text);
20880       if (field_change) {
20881         if (!action->keep_original) {
20882           RemoveFieldValueForObject(vnp->choice, vnp->data.ptrvalue, field_from, scp);
20883         }
20884         RemoveFieldValueForObject(vnp->choice, vnp->data.ptrvalue, field_to, NULL);
20885         for (val_vnp = val_list_to; val_vnp != NULL; val_vnp = val_vnp->next) {
20886           SetFieldValueForObjectEx(vnp->choice, vnp->data.ptrvalue, field_to,
20887                                    NULL, (CharPtr) (val_vnp->data.ptrvalue),
20888                                    ExistingTextOption_add_qual, batch_extra);
20889         }
20890       }
20891       if (also_change_mrna) {
20892         field_change |= AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
20893       }
20894       if (field_change) {
20895         num_succeed++;
20896       }
20897       val_list_from = ValNodeFreeData(val_list_from);
20898       val_list_to = ValNodeFreeData(val_list_to);
20899     }
20900   }
20901 
20902   field_from = FieldTypeFree (field_from);
20903   field_to = FieldTypeFree (field_to);
20904   field_name = MemFree (field_name);
20905 
20906   return num_succeed;
20907 }
20908 
20909 
DoConvertActionToObjectList(ConvertActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp)20910 NLM_EXTERN Int4 DoConvertActionToObjectList (ConvertActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp)
20911 {
20912   return DoConvertActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL);
20913 }
20914 
20915 
DoCopyActionToObjectListEx(CopyActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp,BatchExtraPtr batch_extra)20916 NLM_EXTERN Int4 DoCopyActionToObjectListEx (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
20917 {
20918   ValNodePtr vnp;
20919   Int4       num_succeed = 0, num_fail = 0;
20920   CharPtr    str;
20921   FieldTypePtr field_from, field_to;
20922 
20923   if (action == NULL || object_list == NULL) return 0;
20924   field_from = GetFromFieldFromFieldPair (action->fields);
20925   field_to = GetToFieldFromFieldPair (action->fields);
20926 
20927   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20928     str = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
20929     if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text, batch_extra)) {
20930       if (also_change_mrna) {
20931         AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
20932       }
20933       num_succeed ++;
20934     } else {
20935       num_fail++;
20936     }
20937     str = MemFree (str);
20938   }
20939 
20940   field_from = FieldTypeFree (field_from);
20941   field_to = FieldTypeFree (field_to);
20942   return num_succeed;
20943 }
20944 
20945 
DoCopyActionToObjectList(CopyActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp)20946 NLM_EXTERN Int4 DoCopyActionToObjectList (CopyActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp)
20947 {
20948   return DoCopyActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL);
20949 }
20950 
DoSwapActionToObjectListEx(SwapActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp,BatchExtraPtr batch_extra)20951 NLM_EXTERN Int4 DoSwapActionToObjectListEx (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
20952 {
20953   ValNodePtr vnp;
20954   Int4       num_succeed = 0, num_fail = 0;
20955   CharPtr    str1, str2;
20956   FieldTypePtr field_from, field_to;
20957 
20958   if (action == NULL || object_list == NULL) return 0;
20959   field_from = GetFromFieldFromFieldPair (action->fields);
20960   field_to = GetToFieldFromFieldPair (action->fields);
20961 
20962   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20963     str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
20964     str2 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, batch_extra);
20965     if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, ExistingTextOption_replace_old, batch_extra)
20966         && SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str2, ExistingTextOption_replace_old, batch_extra)) {
20967       if (also_change_mrna) {
20968         AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
20969       }
20970       num_succeed ++;
20971     } else {
20972       num_fail++;
20973     }
20974     str1 = MemFree (str1);
20975     str2 = MemFree (str2);
20976   }
20977   field_from = FieldTypeFree (field_from);
20978   field_to = FieldTypeFree (field_to);
20979   return num_succeed;
20980 }
20981 
20982 
DoSwapActionToObjectList(SwapActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp)20983 NLM_EXTERN Int4 DoSwapActionToObjectList (SwapActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp)
20984 {
20985   return DoSwapActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL);
20986 }
20987 
20988 
DoRemoveActionToObjectList(RemoveActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp)20989 NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp)
20990 {
20991   ValNodePtr vnp;
20992   Int4       num_succeed = 0, num_fail = 0;
20993 
20994   if (action == NULL || object_list == NULL) return 0;
20995 
20996   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
20997     if (RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp)) {
20998       if (also_change_mrna) {
20999         AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
21000       }
21001       num_succeed ++;
21002     } else {
21003       num_fail++;
21004     }
21005   }
21006   return num_succeed;
21007 }
21008 
21009 
DoParseActionToObjectListEx(AECRParseActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp,BatchExtraPtr batch_extra)21010 NLM_EXTERN Int4 DoParseActionToObjectListEx (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp, BatchExtraPtr batch_extra)
21011 {
21012   ValNodePtr vnp;
21013   CharPtr    str1, str2, str3, cp, tmp;
21014   Int4       len, num_succeed = 0, diff, left_len, right_len;
21015   FieldTypePtr field_from, field_to;
21016 
21017   if (action == NULL || object_list == NULL) return 0;
21018   field_from = GetFromFieldFromFieldPair (action->fields);
21019   field_to = GetToFieldFromFieldPair (action->fields);
21020 
21021   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
21022     str1 = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, batch_extra);
21023     str2 = GetTextPortionFromString (str1, action->portion);
21024     str3 = StringSave (str2);
21025     ApplyTextTransformsToString (&str3, action->transform);
21026     if (str3 != NULL) {
21027       if (action->remove_from_parsed) {
21028         cp = FindTextPortionLocationInString (str1, action->portion);
21029         if (cp != NULL) {
21030           len = StringLen (str2);
21031           tmp = cp;
21032           if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->left_marker)
21033               && (tmp = FindTextMarker (str1, &left_len, action->portion->left_marker,
21034                                        action->portion->case_sensitive, action->portion->whole_word)) != NULL) {
21035             if (action->portion->include_left) {
21036               /* adjust */
21037             } else if (!action->portion->include_left) {
21038               /* adjust */
21039               if (action->remove_left) {
21040                 len += left_len;
21041               } else {
21042                 cp += left_len;
21043               }
21044             }
21045           }
21046           if (action->portion != NULL && !IsTextMarkerEmpty (action->portion->right_marker)
21047               && action->remove_right
21048               && !action->portion->include_right
21049               && action->portion != NULL
21050               && (tmp = FindTextMarker (tmp, &right_len, action->portion->right_marker,
21051                                         action->portion->case_sensitive, action->portion->whole_word)) != NULL) {
21052             diff = right_len;
21053             len += diff;
21054           }
21055           StringCpy (cp, cp + len);
21056           SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old, batch_extra);
21057         }
21058       }
21059       if (SetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str3, action->existing_text, batch_extra)) {
21060         if (also_change_mrna) {
21061           AlsoChangeMrnaForObject (vnp->choice, vnp->data.ptrvalue);
21062         }
21063         num_succeed++;
21064       }
21065     }
21066     str1 = MemFree (str1);
21067     str2 = MemFree (str2);
21068     str3 = MemFree (str3);
21069   }
21070   field_from = FieldTypeFree (field_from);
21071   field_to = FieldTypeFree (field_to);
21072   return num_succeed;
21073 }
21074 
21075 
DoParseActionToObjectList(AECRParseActionPtr action,ValNodePtr object_list,Boolean also_change_mrna,StringConstraintPtr scp)21076 NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr object_list, Boolean also_change_mrna, StringConstraintPtr scp)
21077 {
21078   return DoParseActionToObjectListEx (action, object_list, also_change_mrna, scp, NULL);
21079 }
21080 
21081 
ApplyAECRActionToSeqEntry(AECRActionPtr act,SeqEntryPtr sep,BoolPtr created_protein_features)21082 static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep, BoolPtr created_protein_features)
21083 {
21084   StringConstraintPtr scp;
21085   ApplyActionPtr      a;
21086   ConvertActionPtr    c;
21087   RemoveActionPtr     r;
21088   EditActionPtr       e;
21089   RemoveOutsideActionPtr ro;
21090   ValNodePtr          object_list = NULL;
21091   Uint1               field_type;
21092   Uint2               entityID;
21093   Int4                num_succeed = 0;
21094   FieldTypePtr        field_from;
21095   BatchExtraPtr       batch_extra;
21096   AECRActionPtr       act_cpy = NULL;
21097   FeatureFieldPtr     field_cpy;
21098 
21099   if (act == NULL || act->action == NULL) return 0;
21100 
21101   field_type = FieldTypeFromAECRAction (act);
21102   if (field_type == FieldType_cds_gene_prot) {
21103     if (act->action->choice == ActionChoice_edit) {
21104       act_cpy = AsnIoMemCopy (act, (AsnReadFunc) AECRActionAsnRead, (AsnWriteFunc) AECRActionAsnWrite);
21105       e = (EditActionPtr)act_cpy->action->data.ptrvalue;
21106       field_cpy = FeatureFieldFromCDSGeneProtField (e->field->data.intvalue);
21107       e->field->choice = FieldType_feature_field;
21108       e->field->data.ptrvalue = field_cpy;
21109       act = act_cpy;
21110       field_type = FieldTypeFromAECRAction (act);
21111     }
21112   }
21113 
21114   batch_extra = BatchExtraNew ();
21115   InitBatchExtraForAECRAction (batch_extra, act, sep);
21116 
21117   if (field_type == FieldType_cds_gene_prot) {
21118     entityID = ObjMgrGetEntityIDForChoice(sep);
21119     object_list = BuildCGPSetList (entityID, act, created_protein_features);
21120 
21121   } else {
21122     object_list = GetObjectListForAECRActionEx (sep, act, batch_extra);
21123   }
21124 
21125   if (object_list == NULL) {
21126       return 0;
21127   }
21128 
21129   switch (act->action->choice) {
21130     case ActionChoice_apply:
21131       a = (ApplyActionPtr) act->action->data.ptrvalue;
21132       scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint);
21133       num_succeed = DoApplyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra);
21134       if (a->field->choice == FieldType_misc || a->field->choice == FieldType_dblink) {
21135         DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
21136       }
21137       break;
21138     case ActionChoice_edit:
21139       e = (EditActionPtr) act->action->data.ptrvalue;
21140       num_succeed = DoEditActionToObjectListEx (e, object_list, act->also_change_mrna, batch_extra);
21141       if (e->field->choice == FieldType_misc) {
21142         DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
21143       }
21144       break;
21145     case ActionChoice_remove_outside:
21146       ro = (RemoveOutsideActionPtr) act->action->data.ptrvalue;
21147       num_succeed = DoRemoveOutsideToObjectList (ro, object_list, act->also_change_mrna, batch_extra);
21148       if (ro->field->choice == FieldType_misc) {
21149         DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
21150       }
21151       break;
21152     case ActionChoice_convert:
21153       scp = NULL;
21154       if (act->constraint != NULL) {
21155         c = (ConvertActionPtr) act->action->data.ptrvalue;
21156         field_from = GetFromFieldFromFieldPair (c->fields);
21157         scp = FindStringConstraintInConstraintSetForField (field_from, act->constraint);
21158         field_from = FieldTypeFree (field_from);
21159       }
21160       num_succeed = DoConvertActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp, batch_extra);
21161       break;
21162     case ActionChoice_swap:
21163       num_succeed = DoSwapActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra);
21164       break;
21165     case ActionChoice_copy:
21166       num_succeed = DoCopyActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra);
21167       break;
21168     case ActionChoice_remove:
21169       r = (RemoveActionPtr) act->action->data.ptrvalue;
21170       scp = FindStringConstraintInConstraintSetForField (r->field, act->constraint);
21171       num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, act->also_change_mrna, scp);
21172       if (r->field->choice == FieldType_misc) {
21173         DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
21174       } else if (r->field->choice == FieldType_dblink) {
21175         DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
21176       }
21177       break;
21178     case ActionChoice_parse:
21179       num_succeed = DoParseActionToObjectListEx (act->action->data.ptrvalue, object_list, act->also_change_mrna, NULL, batch_extra);
21180       break;
21181   }
21182   object_list = FreeObjectList (object_list);
21183   batch_extra = BatchExtraFree (batch_extra);
21184   act_cpy = AECRActionFree (act_cpy);
21185   return num_succeed;
21186 }
21187 
21188 
AECRSampleNew(void)21189 static AECRSamplePtr AECRSampleNew (void)
21190 {
21191   AECRSamplePtr sample;
21192 
21193   sample = (AECRSamplePtr) MemNew (sizeof (AECRSampleData));
21194   MemSet (sample, 0, sizeof (AECRSampleData));
21195   sample->all_same = TRUE;
21196   return sample;
21197 }
21198 
21199 
AECRSampleFree(AECRSamplePtr sample)21200 NLM_EXTERN AECRSamplePtr AECRSampleFree (AECRSamplePtr sample)
21201 {
21202   if (sample != NULL) {
21203     sample->field = FieldTypeFree (sample->field);
21204     sample->first_value = MemFree (sample->first_value);
21205     sample = MemFree (sample);
21206   }
21207   return sample;
21208 }
21209 
21210 
AECRSampleListFree(ValNodePtr list)21211 NLM_EXTERN ValNodePtr AECRSampleListFree (ValNodePtr list)
21212 {
21213   ValNodePtr list_next;
21214 
21215   while (list != NULL) {
21216     list_next = list->next;
21217     list->next = NULL;
21218     list->data.ptrvalue = AECRSampleFree (list->data.ptrvalue);
21219     list = ValNodeFree (list);
21220     list = list_next;
21221   }
21222   return list;
21223 }
21224 
21225 
AddTextToAECRSample(AECRSamplePtr sample,CharPtr txt)21226 static void AddTextToAECRSample (AECRSamplePtr sample, CharPtr txt)
21227 {
21228   if (StringHasNoText (txt)) {
21229     txt = MemFree (txt);
21230   } else if (sample != NULL) {
21231     sample->num_found ++;
21232     if (sample->first_value == NULL) {
21233       sample->first_value = txt;
21234     } else {
21235       if (sample->all_same && StringCmp (sample->first_value, txt) != 0) {
21236         sample->all_same = FALSE;
21237       }
21238       txt = MemFree (txt);
21239     }
21240   }
21241 }
21242 
21243 
GetAECRSampleFromObjectListEx(ValNodePtr object_list,FieldTypePtr field,BatchExtraPtr batch_extra)21244 NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectListEx (ValNodePtr object_list, FieldTypePtr field, BatchExtraPtr batch_extra)
21245 {
21246   AECRSamplePtr sample;
21247   ValNodePtr    vnp, prot_vnp, bsp_list;
21248   CharPtr       txt;
21249   CGPSetPtr     cgp;
21250   SeqFeatPtr    sfp;
21251   BatchExtraPtr b = NULL;
21252   SeqEntryPtr   sep;
21253 
21254   if (object_list == NULL || field == NULL) return NULL;
21255 
21256   if (batch_extra == NULL) {
21257     b = BatchExtraNew ();
21258     batch_extra = b;
21259     bsp_list = BioseqListForObjectList (object_list);
21260     for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
21261       sep = SeqMgrGetSeqEntryForData (vnp->data.ptrvalue);
21262       InitBatchExtraForField (batch_extra, field, sep);
21263     }
21264     bsp_list = ValNodeFree (bsp_list);
21265   }
21266 
21267   sample = AECRSampleNew ();
21268   sample->field = FieldTypeCopy (field);
21269   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
21270     if (vnp->choice == 0 && IsFieldTypeMatPeptideRelated (field)) {
21271       cgp = (CGPSetPtr) vnp->data.ptrvalue;
21272       if (cgp != NULL) {
21273         for (prot_vnp = cgp->prot_list; prot_vnp != NULL; prot_vnp = prot_vnp->next) {
21274           sfp = (SeqFeatPtr) prot_vnp->data.ptrvalue;
21275           if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa) {
21276             txt = GetFieldValueForObjectEx (OBJ_SEQFEAT, sfp, field, NULL, batch_extra);
21277             AddTextToAECRSample (sample, txt);
21278           }
21279         }
21280       }
21281     } else {
21282       txt = GetFieldValueForObjectEx (vnp->choice, vnp->data.ptrvalue, field, NULL, batch_extra);
21283       AddTextToAECRSample (sample, txt);
21284     }
21285   }
21286 
21287   b = BatchExtraFree (b);
21288   return sample;
21289 }
21290 
21291 
GetAECRSampleFromObjectList(ValNodePtr object_list,FieldTypePtr field)21292 NLM_EXTERN AECRSamplePtr GetAECRSampleFromObjectList (ValNodePtr object_list, FieldTypePtr field)
21293 {
21294   return GetAECRSampleFromObjectListEx (object_list, field, NULL);
21295 }
21296 
21297 
GetFieldsFromAECR(AECRActionPtr act,FieldTypePtr PNTR pField,ValNodePtr PNTR pFieldPair)21298 static void GetFieldsFromAECR (AECRActionPtr act, FieldTypePtr PNTR pField, ValNodePtr PNTR pFieldPair)
21299 {
21300   ApplyActionPtr     a;
21301   EditActionPtr      e;
21302   ConvertActionPtr   c;
21303   SwapActionPtr      s;
21304   CopyActionPtr      cp;
21305   RemoveActionPtr    r;
21306   AECRParseActionPtr p;
21307   RemoveOutsideActionPtr ro;
21308 
21309   if (pField != NULL) {
21310     *pField = NULL;
21311   }
21312   if (pFieldPair != NULL) {
21313     *pFieldPair = NULL;
21314   }
21315   if (act == NULL || act->action == NULL || act->action->data.ptrvalue == NULL) {
21316     return;
21317   }
21318 
21319   switch (act->action->choice) {
21320     case ActionChoice_apply:
21321       if (pField != NULL) {
21322         a = (ApplyActionPtr) act->action->data.ptrvalue;
21323         *pField = a->field;
21324       }
21325       break;
21326     case ActionChoice_edit:
21327       if (pField != NULL) {
21328         e = (EditActionPtr) act->action->data.ptrvalue;
21329         *pField = e->field;
21330       }
21331       break;
21332     case ActionChoice_remove_outside:
21333       if (pField != NULL) {
21334         ro = (RemoveOutsideActionPtr) act->action->data.ptrvalue;
21335         *pField = ro->field;
21336       }
21337       break;
21338     case ActionChoice_convert:
21339       if (pFieldPair != NULL) {
21340         c = (ConvertActionPtr) act->action->data.ptrvalue;
21341         *pFieldPair = c->fields;
21342       }
21343       break;
21344     case ActionChoice_swap:
21345       if (pFieldPair != NULL) {
21346         s = (SwapActionPtr) act->action->data.ptrvalue;
21347         *pFieldPair = s->fields;
21348       }
21349       break;
21350     case ActionChoice_copy:
21351       if (pFieldPair != NULL) {
21352         cp = (CopyActionPtr) act->action->data.ptrvalue;
21353         *pFieldPair = cp->fields;
21354       }
21355       break;
21356     case ActionChoice_remove:
21357       if (pField != NULL) {
21358         r = (RemoveActionPtr) act->action->data.ptrvalue;
21359         *pField = r->field;
21360       }
21361       break;
21362     case ActionChoice_parse:
21363       if (pFieldPair != NULL) {
21364         p = (AECRParseActionPtr) act->action->data.ptrvalue;
21365         *pFieldPair = p->fields;
21366       }
21367       break;
21368   }
21369 }
21370 
21371 
FieldTypeListFree(ValNodePtr list)21372 NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListFree (ValNodePtr list)
21373 {
21374   ValNodePtr list_next;
21375 
21376   while (list != NULL) {
21377     list_next = list->next;
21378     list->next = NULL;
21379     list = FieldTypeFree (list);
21380     list = list_next;
21381   }
21382   return list;
21383 }
21384 
21385 
FieldTypeListCopy(ValNodePtr orig)21386 NLM_EXTERN ValNodePtr LIBCALLBACK FieldTypeListCopy (ValNodePtr orig)
21387 {
21388   ValNodePtr prev = NULL, new_list = NULL, vnp;
21389 
21390   while (orig != NULL) {
21391     vnp = FieldTypeCopy (orig);
21392     if (prev == NULL) {
21393       new_list = vnp;
21394     } else {
21395       prev->next = vnp;
21396     }
21397     prev = vnp;
21398     orig = orig->next;
21399   }
21400   return new_list;
21401 }
21402 
21403 
SortVnpByChoiceAndIntvalue(VoidPtr ptr1,VoidPtr ptr2)21404 static int LIBCALLBACK SortVnpByChoiceAndIntvalue (VoidPtr ptr1, VoidPtr ptr2)
21405 
21406 {
21407   ValNodePtr  vnp1;
21408   ValNodePtr  vnp2;
21409   int         rval = 0;
21410 
21411   if (ptr1 != NULL && ptr2 != NULL) {
21412     vnp1 = *((ValNodePtr PNTR) ptr1);
21413     vnp2 = *((ValNodePtr PNTR) ptr2);
21414     if (vnp1 == NULL && vnp2 == NULL) {
21415       rval = 0;
21416     } else if (vnp1 == NULL) {
21417       rval = -1;
21418     } else if (vnp2 == NULL) {
21419       rval = 1;
21420     } else if (vnp1->choice > vnp2->choice) {
21421       rval = 1;
21422     } else if (vnp1->choice < vnp2->choice) {
21423       rval = -1;
21424     } else if (vnp1->data.intvalue > vnp2->data.intvalue) {
21425       rval = 1;
21426     } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
21427       rval = -1;
21428     } else {
21429       rval = 0;
21430     }
21431   }
21432   return rval;
21433 }
21434 
21435 
21436 /* Callback function used for sorting and uniqueing */
21437 
SortVnpByFieldType(VoidPtr ptr1,VoidPtr ptr2)21438 NLM_EXTERN int LIBCALLBACK SortVnpByFieldType (VoidPtr ptr1, VoidPtr ptr2)
21439 
21440 {
21441   ValNodePtr  vnp1;
21442   ValNodePtr  vnp2;
21443   int         rval = 0;
21444 
21445   if (ptr1 != NULL && ptr2 != NULL) {
21446     vnp1 = *((ValNodePtr PNTR) ptr1);
21447     vnp2 = *((ValNodePtr PNTR) ptr2);
21448     rval = CompareFieldTypes (vnp1, vnp2);
21449   }
21450 
21451   return rval;
21452 }
21453 
21454 
SortVnpByFieldTypeAndSourceQualifier(VoidPtr ptr1,VoidPtr ptr2)21455 NLM_EXTERN int LIBCALLBACK SortVnpByFieldTypeAndSourceQualifier (VoidPtr ptr1, VoidPtr ptr2)
21456 
21457 {
21458   ValNodePtr  vnp1;
21459   ValNodePtr  vnp2;
21460   int         rval = 0;
21461 
21462   if (ptr1 != NULL && ptr2 != NULL) {
21463     vnp1 = *((ValNodePtr PNTR) ptr1);
21464     vnp2 = *((ValNodePtr PNTR) ptr2);
21465     rval = CompareFieldTypesEx (vnp1, vnp2, TRUE);
21466   }
21467 
21468   return rval;
21469 }
21470 
21471 
GetBioSourceFields(BioSourcePtr biop,Pointer userdata)21472 static void GetBioSourceFields (BioSourcePtr biop, Pointer userdata)
21473 {
21474   ValNodePtr new_list = NULL;
21475 
21476   if (biop == NULL || userdata == NULL) {
21477     return;
21478   }
21479 
21480   /* although the following function doesn't preserve order because we
21481      reverse the args, that's okay
21482      because every function that uses GetBioSourceFields sorts the results
21483      at the end anyway, and we want the shorter list as the first argument
21484      since ValNodeLink is linear in the length of its first arg. */
21485   new_list = GetSourceQualFieldListFromBioSource (biop);
21486   *(ValNodePtr PNTR) userdata =
21487       ValNodeLink ( &new_list,
21488                     *(ValNodePtr PNTR) userdata);
21489 }
21490 
21491 
SortUniqueFieldTypeList(ValNodePtr PNTR field_list)21492 NLM_EXTERN void SortUniqueFieldTypeList (ValNodePtr PNTR field_list)
21493 {
21494   if (field_list == NULL) return;
21495   *field_list = ValNodeSort (*field_list, SortVnpByFieldType);
21496   ValNodeUnique (field_list, SortVnpByFieldType, FieldTypeListFree);
21497 }
21498 
21499 
GetSourceQualSampleFieldList(SeqEntryPtr sep)21500 NLM_EXTERN ValNodePtr GetSourceQualSampleFieldList (SeqEntryPtr sep)
21501 {
21502   ValNodePtr field_list = NULL;
21503 
21504   VisitBioSourcesInSep (sep, &field_list, GetBioSourceFields);
21505   field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier);
21506   ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree);
21507 
21508   return field_list;
21509 }
21510 
21511 
GetSourceQualSampleFieldListForSeqEntryList(ValNodePtr list)21512 NLM_EXTERN ValNodePtr GetSourceQualSampleFieldListForSeqEntryList (ValNodePtr list)
21513 {
21514   ValNodePtr field_list = NULL;
21515   ValNodePtr vnp;
21516 
21517   if (list == NULL) {
21518     return NULL;
21519   }
21520 
21521   for (vnp = list; vnp != NULL; vnp = vnp->next) {
21522     VisitBioSourcesInSep (vnp->data.ptrvalue, &field_list, GetBioSourceFields);
21523   }
21524   field_list = ValNodeSort (field_list, SortVnpByFieldTypeAndSourceQualifier);
21525   ValNodeUnique (&field_list, SortVnpByFieldTypeAndSourceQualifier, FieldTypeListFree);
21526 
21527   return field_list;
21528 }
21529 
21530 
GetFeatureQualFieldListForAECRSampleCallback(SeqFeatPtr sfp,Pointer data)21531 static void GetFeatureQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer data)
21532 {
21533   ValNodePtr PNTR list;
21534 
21535   list = (ValNodePtr PNTR) data;
21536   if (list == NULL || sfp == NULL) return;
21537 
21538   ValNodeLink (list, GetFieldListFromFeature (sfp));
21539 }
21540 
GetFeatureQualFieldList(SeqEntryPtr sep)21541 static ValNodePtr GetFeatureQualFieldList (SeqEntryPtr sep)
21542 {
21543   ValNodePtr field_list = NULL;
21544 
21545   VisitFeaturesInSep (sep, &field_list, GetFeatureQualFieldListForAECRSampleCallback);
21546   field_list = ValNodeSort (field_list, SortVnpByFieldType);
21547   ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree);
21548   return field_list;
21549 }
21550 
21551 
GetRnaQualFieldListForAECRSampleCallback(SeqFeatPtr sfp,Pointer userdata)21552 static void GetRnaQualFieldListForAECRSampleCallback (SeqFeatPtr sfp, Pointer userdata)
21553 {
21554   RnaFeatTypePtr type;
21555   RnaRefPtr      rrp;
21556   RnaQualPtr     rq;
21557   RNAGenPtr      rgp;
21558   GeneRefPtr     grp = NULL;
21559   SeqFeatPtr     gene = NULL;
21560   SeqMgrFeatContext fcontext;
21561 
21562   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL || userdata == NULL) {
21563     return;
21564   }
21565 
21566   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
21567 
21568   type = RnaFeatTypeFromSeqFeat (sfp);
21569 
21570   if (type == NULL) return;
21571 
21572   /* add product if appropriate */
21573   if ((type->choice == RnaFeatType_preRNA || type->choice == RnaFeatType_mRNA
21574         || type->choice == RnaFeatType_rRNA || type->choice == RnaFeatType_miscRNA
21575         || type->choice == RnaFeatType_any)
21576        && rrp->ext.choice == 1
21577        && !StringHasNoText (rrp->ext.value.ptrvalue)) {
21578     rq = RnaQualNew ();
21579     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21580     rq->field = Rna_field_product;
21581     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21582   } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL
21583     && !StringHasNoText (rgp->product)) {
21584     rq = RnaQualNew ();
21585     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21586     rq->field = Rna_field_product;
21587     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21588   }
21589 
21590   /* add comment if present */
21591   if (!StringHasNoText (sfp->comment)) {
21592     rq = RnaQualNew ();
21593     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21594     rq->field = Rna_field_comment;
21595     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21596   }
21597 
21598   /* add tRNA specific if appropriate */
21599   if (type->choice == RnaFeatType_tRNA || (type->choice == RnaFeatType_any && rrp->type == 2)) {
21600     /* codons recognized */
21601     rq = RnaQualNew ();
21602     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21603     rq->field = Rna_field_codons_recognized;
21604     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21605 
21606     /* anticodon */
21607     rq = RnaQualNew ();
21608     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21609     rq->field = Rna_field_anticodon;
21610     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21611   }
21612 
21613   /* add ncRNA class if appropriate and present */
21614   if ((type->choice == RnaFeatType_ncRNA || type->choice == RnaFeatType_any)
21615       && rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL
21616       && !StringHasNoText (rgp->_class)) {
21617     rq = RnaQualNew ();
21618     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21619     rq->field = Rna_field_ncrna_class;
21620     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21621   }
21622 
21623   /* add transcript ID if present */
21624   if (sfp->product != NULL) {
21625     rq = RnaQualNew ();
21626     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21627     rq->field = Rna_field_transcript_id;
21628     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21629   }
21630 
21631   /* add gene fields */
21632   grp = SeqMgrGetGeneXref (sfp);
21633   if (grp == NULL) {
21634     gene = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
21635     if (gene != NULL) {
21636       grp = gene->data.value.ptrvalue;
21637     }
21638   }
21639   if (grp != NULL && !SeqMgrGeneIsSuppressed (grp)) {
21640     /* gene locus */
21641     if (!StringHasNoText (grp->locus)) {
21642       rq = RnaQualNew ();
21643       rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21644       rq->field = Rna_field_gene_locus;
21645       ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21646     }
21647     /* gene description */
21648     if (!StringHasNoText (grp->desc)) {
21649       rq = RnaQualNew ();
21650       rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21651       rq->field = Rna_field_gene_locus;
21652       ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21653     }
21654     /* maploc */
21655     if (!StringHasNoText (grp->maploc)) {
21656       rq = RnaQualNew ();
21657       rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21658       rq->field = Rna_field_gene_maploc;
21659       ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21660     }
21661     /* locus tag */
21662     if (!StringHasNoText (grp->locus_tag)) {
21663       rq = RnaQualNew ();
21664       rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21665       rq->field = Rna_field_gene_locus_tag;
21666       ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21667     }
21668     /* synonym */
21669     if (grp->syn != NULL) {
21670       rq = RnaQualNew ();
21671       rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21672       rq->field = Rna_field_gene_synonym;
21673       ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21674     }
21675   }
21676 
21677   /* gene comment */
21678   if (gene != NULL && !StringHasNoText (gene->comment)) {
21679     rq = RnaQualNew ();
21680     rq->type = AsnIoMemCopy (type, (AsnReadFunc) RnaFeatTypeAsnRead, (AsnWriteFunc) RnaFeatTypeAsnWrite);
21681     rq->field = Rna_field_gene_comment;
21682     ValNodeAddPointer ((ValNodePtr PNTR) userdata, FieldType_rna_field, rq);
21683   }
21684 }
21685 
21686 
GetRnaQualFieldList(SeqEntryPtr sep)21687 static ValNodePtr GetRnaQualFieldList (SeqEntryPtr sep)
21688 {
21689   ValNodePtr field_list = NULL;
21690 
21691   VisitFeaturesInSep (sep, &field_list, GetRnaQualFieldListForAECRSampleCallback);
21692   field_list = ValNodeSort (field_list, SortVnpByFieldType);
21693   ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree);
21694   return field_list;
21695 }
21696 
21697 
GetStructuredCommentFieldListFromUserObject(UserObjectPtr uop)21698 NLM_EXTERN ValNodePtr GetStructuredCommentFieldListFromUserObject (UserObjectPtr uop)
21699 {
21700   ValNodePtr list = NULL;
21701   UserFieldPtr  ufp;
21702   ValNodePtr    vnp;
21703 
21704   if (uop != NULL && IsUserObjectStructuredComment (uop)) {
21705     ufp = uop->data;
21706     while (ufp != NULL) {
21707       if (ufp->label != NULL && ufp->label->str != NULL
21708           && StringCmp (ufp->label->str, "StructuredCommentPrefix") != 0
21709           && StringCmp (ufp->label->str, "StructuredCommentSuffix") != 0) {
21710         vnp = ValNodeNew (NULL);
21711         vnp->choice = StructuredCommentField_named;
21712         vnp->data.ptrvalue = StringSave (ufp->label->str);
21713         ValNodeAddPointer (&list, FieldType_struc_comment_field, vnp);
21714       }
21715       ufp = ufp->next;
21716     }
21717   }
21718   return list;
21719 }
21720 
21721 
GetStructuredCommentFieldsCallback(SeqDescrPtr sdp,Pointer data)21722 static void GetStructuredCommentFieldsCallback (SeqDescrPtr sdp, Pointer data)
21723 {
21724   UserObjectPtr uop;
21725 
21726   if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user
21727       && (uop = sdp->data.ptrvalue) != NULL
21728       && IsUserObjectStructuredComment (uop)) {
21729     ValNodeLink ((ValNodePtr PNTR) data, GetStructuredCommentFieldListFromUserObject(uop));
21730   }
21731 }
21732 
21733 
GetStructuredCommentFieldList(SeqEntryPtr sep)21734 NLM_EXTERN ValNodePtr GetStructuredCommentFieldList (SeqEntryPtr sep)
21735 {
21736   ValNodePtr field_list = NULL;
21737   ValNodePtr dbname, field_name;
21738 
21739   dbname = ValNodeNew (NULL);
21740   dbname->choice = StructuredCommentField_database;
21741   ValNodeAddPointer (&field_list, FieldType_struc_comment_field, dbname);
21742 
21743   field_name = ValNodeNew (NULL);
21744   field_name->choice = StructuredCommentField_field_name;
21745   ValNodeAddPointer (&field_list, FieldType_struc_comment_field, field_name);
21746 
21747   VisitDescriptorsInSep (sep, &field_list, GetStructuredCommentFieldsCallback);
21748 
21749   field_list = ValNodeSort (field_list, SortVnpByFieldType);
21750   ValNodeUnique (&field_list, SortVnpByFieldType, FieldTypeListFree);
21751   return field_list;
21752 }
21753 
21754 
CollectBioSourceDescCallback(SeqDescrPtr sdp,Pointer data)21755 static void CollectBioSourceDescCallback (SeqDescrPtr sdp, Pointer data)
21756 {
21757   if (sdp != NULL && sdp->choice == Seq_descr_source && data != NULL) {
21758     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
21759   }
21760 }
21761 
CollectBioSourceFeatCallback(SeqFeatPtr sfp,Pointer data)21762 static void CollectBioSourceFeatCallback (SeqFeatPtr sfp, Pointer data)
21763 {
21764   if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) {
21765     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
21766   }
21767 }
21768 
21769 
CollectFeaturesCallback(SeqFeatPtr sfp,Pointer data)21770 static void CollectFeaturesCallback (SeqFeatPtr sfp, Pointer data)
21771 {
21772   if (sfp != NULL && data != NULL && sfp->data.choice != SEQFEAT_BIOSRC && sfp->data.choice != SEQFEAT_PUB) {
21773     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
21774   }
21775 }
21776 
21777 
CollectPubDescCallback(SeqDescrPtr sdp,Pointer data)21778 static void CollectPubDescCallback (SeqDescrPtr sdp, Pointer data)
21779 {
21780   if (sdp != NULL && sdp->choice == Seq_descr_pub && data != NULL) {
21781     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
21782   }
21783 }
21784 
CollectPubFeatCallback(SeqFeatPtr sfp,Pointer data)21785 static void CollectPubFeatCallback (SeqFeatPtr sfp, Pointer data)
21786 {
21787   if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
21788     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQFEAT, sfp);
21789   }
21790 }
21791 
21792 
CollectBioseqCallback(BioseqPtr bsp,Pointer data)21793 static void CollectBioseqCallback (BioseqPtr bsp, Pointer data)
21794 {
21795   if (bsp != NULL && data != NULL) {
21796     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
21797   }
21798 }
21799 
21800 
21801 /*
21802 static void CollectNucBioseqCallback (BioseqPtr bsp, Pointer data)
21803 {
21804   if (bsp != NULL && data != NULL && !ISA_aa (bsp->mol)) {
21805     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
21806   }
21807 }
21808 */
21809 
21810 
AddCommentDescriptorDestinationsForBioseq(BioseqPtr bsp,ValNodePtr PNTR dest_list)21811 static void AddCommentDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
21812 {
21813   SeqDescrPtr sdp;
21814   SeqMgrDescContext context;
21815   Boolean found = FALSE;
21816   ObjValNodePtr ovp;
21817 
21818   if (bsp == NULL || dest_list == NULL) {
21819     return;
21820   }
21821 
21822   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &context);
21823        sdp != NULL;
21824        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &context)) {
21825     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
21826     found = TRUE;
21827   }
21828   if (!found) {
21829     /* if no existing comment descriptor, create one, marked for delete.
21830      * unmark it for deletion when it gets populated.
21831      */
21832     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_comment);
21833     sdp->data.ptrvalue = StringSave ("");
21834     ovp = (ObjValNodePtr) sdp;
21835     ovp->idx.deleteme = TRUE;
21836     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
21837   }
21838 }
21839 
21840 
CollectCommentDescriptors(SeqEntryPtr sep)21841 static ValNodePtr CollectCommentDescriptors (SeqEntryPtr sep)
21842 {
21843   ValNodePtr seq_list = NULL, vnp, desc_list = NULL;
21844 
21845   if (sep == NULL) {
21846     return NULL;
21847   }
21848 
21849   /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
21850   seq_list = CollectNucBioseqs (sep);
21851 
21852   for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
21853     AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
21854   }
21855   seq_list = ValNodeFree (seq_list);
21856   return desc_list;
21857 }
21858 
21859 
CollectStructuredCommentsCallback(SeqDescrPtr sdp,Pointer data)21860 static void CollectStructuredCommentsCallback (SeqDescrPtr sdp, Pointer data)
21861 {
21862   UserObjectPtr uop;
21863 
21864   if (sdp != NULL && data != NULL && sdp->choice == Seq_descr_user
21865       && (uop = sdp->data.ptrvalue) != NULL
21866       && IsUserObjectStructuredComment (uop)) {
21867     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
21868   }
21869 }
21870 
21871 
CollectDBLinkDescriptors(SeqEntryPtr sep)21872 static ValNodePtr CollectDBLinkDescriptors (SeqEntryPtr sep)
21873 {
21874   ValNodePtr seq_list = NULL, vnp, desc_list = NULL;
21875 
21876   if (sep == NULL) {
21877     return NULL;
21878   }
21879 
21880   /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
21881   seq_list = CollectNucBioseqs (sep);
21882 
21883   for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
21884     AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
21885   }
21886   seq_list = ValNodeFree (seq_list);
21887   return desc_list;
21888 }
21889 
21890 
AddDeflineDescriptorDestinationsForBioseq(BioseqPtr bsp,ValNodePtr PNTR dest_list)21891 static void AddDeflineDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
21892 {
21893   SeqDescrPtr sdp;
21894   SeqMgrDescContext context;
21895   Boolean found = FALSE;
21896   ObjValNodePtr ovp;
21897 
21898   if (bsp == NULL || dest_list == NULL) {
21899     return;
21900   }
21901 
21902   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
21903        sdp != NULL;
21904        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &context)) {
21905     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
21906     found = TRUE;
21907   }
21908   if (!found) {
21909     /* if no existing comment descriptor, create one, marked for delete.
21910      * unmark it for deletion when it gets populated.
21911      */
21912     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_title);
21913     sdp->data.ptrvalue = StringSave ("");
21914     ovp = (ObjValNodePtr) sdp;
21915     ovp->idx.deleteme = TRUE;
21916     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
21917   }
21918 }
21919 
21920 
CollectDeflineDescriptors(SeqEntryPtr sep)21921 static ValNodePtr CollectDeflineDescriptors (SeqEntryPtr sep)
21922 {
21923   ValNodePtr seq_list = NULL, vnp, desc_list = NULL;
21924 
21925   if (sep == NULL) {
21926     return NULL;
21927   }
21928 
21929   /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
21930   seq_list = CollectNucBioseqs (sep);
21931 
21932   for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
21933     AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
21934   }
21935   seq_list = ValNodeFree (seq_list);
21936   return desc_list;
21937 }
21938 
21939 
AddGenbankBlockDescriptorDestinationsForBioseq(BioseqPtr bsp,ValNodePtr PNTR dest_list)21940 static void AddGenbankBlockDescriptorDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
21941 {
21942   SeqDescrPtr sdp;
21943   SeqMgrDescContext context;
21944   Boolean found = FALSE;
21945   ObjValNodePtr ovp;
21946 
21947   if (bsp == NULL || dest_list == NULL) {
21948     return;
21949   }
21950 
21951   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
21952        sdp != NULL;
21953        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &context)) {
21954     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
21955     found = TRUE;
21956   }
21957   if (!found) {
21958     /* if no existing comment descriptor, create one, marked for delete.
21959      * unmark it for deletion when it gets populated.
21960      */
21961     sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_genbank);
21962     sdp->data.ptrvalue = GBBlockNew ();
21963     ovp = (ObjValNodePtr) sdp;
21964     ovp->idx.deleteme = TRUE;
21965     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
21966   }
21967 }
21968 
21969 
CollectGenbankBlockDescriptors(SeqEntryPtr sep)21970 static ValNodePtr CollectGenbankBlockDescriptors (SeqEntryPtr sep)
21971 {
21972   ValNodePtr seq_list = NULL, vnp, desc_list = NULL;
21973 
21974   if (sep == NULL) {
21975     return NULL;
21976   }
21977 
21978   /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
21979   seq_list = CollectNucBioseqs (sep);
21980 
21981   for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
21982     AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &desc_list);
21983   }
21984   seq_list = ValNodeFree (seq_list);
21985   return desc_list;
21986 }
21987 
21988 
CollectDblinkCallback(SeqDescPtr sdp,Pointer data)21989 static void CollectDblinkCallback (SeqDescPtr sdp, Pointer data)
21990 {
21991   UserObjectPtr uop;
21992 
21993   if (sdp == NULL || data == NULL
21994       || sdp->choice != Seq_descr_user
21995       || (uop = (UserObjectPtr)sdp->data.ptrvalue) == NULL
21996       || uop->type == NULL
21997       || StringCmp (uop->type->str, "DBLink") != 0) {
21998     return;
21999   } else {
22000     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_SEQDESC, sdp);
22001   }
22002 }
22003 
22004 
GetObjectListForFieldType(Uint1 field_type,SeqEntryPtr sep)22005 NLM_EXTERN ValNodePtr GetObjectListForFieldType (Uint1 field_type, SeqEntryPtr sep)
22006 {
22007   ValNodePtr object_list = NULL;
22008   Uint2      entityID;
22009 
22010   switch (field_type) {
22011     case FieldType_source_qual:
22012       VisitDescriptorsInSep (sep, &object_list, CollectBioSourceDescCallback);
22013       VisitFeaturesInSep (sep, &object_list, CollectBioSourceFeatCallback);
22014       break;
22015     case FieldType_cds_gene_prot:
22016       entityID = ObjMgrGetEntityIDForChoice(sep);
22017       object_list = BuildCGPSetList (entityID, NULL, NULL);
22018       break;
22019     case FieldType_feature_field:
22020       VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback);
22021       break;
22022     case FieldType_molinfo_field:
22023       VisitBioseqsInSep (sep, &object_list, CollectBioseqCallback);
22024       break;
22025     case FieldType_pub:
22026       VisitDescriptorsInSep (sep, &object_list, CollectPubDescCallback);
22027       VisitFeaturesInSep (sep, &object_list, CollectPubFeatCallback);
22028       break;
22029     case FieldType_rna_field:
22030       VisitFeaturesInSep (sep, &object_list, CollectFeaturesCallback);
22031       break;
22032     case FieldType_struc_comment_field:
22033       VisitDescriptorsInSep (sep, &object_list, CollectStructuredCommentsCallback);
22034       break;
22035     case FieldType_misc:
22036       /* VisitBioseqsInSep (sep, &object_list, CollectNucBioseqCallback); */
22037       object_list = CollectNucBioseqs (sep);
22038       ValNodeLink (&object_list, CollectCommentDescriptors (sep));
22039       break;
22040     case FieldType_dblink:
22041       VisitDescriptorsInSep (sep, &object_list, CollectDblinkCallback);
22042       break;
22043   }
22044   return object_list;
22045 }
22046 
22047 
22048 typedef struct seqcollector {
22049   ValNodePtr object_list;
22050   ConstraintChoiceSetPtr csp;
22051 } SeqCollectorData, PNTR SeqCollectorPtr;
22052 
22053 
SeqCollectorCallback(BioseqPtr bsp,Pointer data)22054 static void SeqCollectorCallback (BioseqPtr bsp, Pointer data)
22055 {
22056   SeqCollectorPtr s;
22057 
22058   if ((s = (SeqCollectorPtr) data) == NULL) {
22059     return;
22060   }
22061 
22062   if (DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, s->csp)) {
22063     ValNodeAddPointer (&(s->object_list), OBJ_BIOSEQ, bsp);
22064   }
22065 }
22066 
22067 
GetSequenceListForConstraint(SeqEntryPtr sep,ConstraintChoiceSetPtr csp)22068 NLM_EXTERN ValNodePtr GetSequenceListForConstraint (SeqEntryPtr sep, ConstraintChoiceSetPtr csp)
22069 {
22070   SeqCollectorData s;
22071 
22072   MemSet (&s, 0, sizeof (SeqCollectorData));
22073   s.csp = csp;
22074   VisitBioseqsInSep (sep, &s, SeqCollectorCallback);
22075   return s.object_list;
22076 }
22077 
22078 
GetFieldListForFieldType(Uint1 field_type,SeqEntryPtr sep)22079 NLM_EXTERN ValNodePtr GetFieldListForFieldType (Uint1 field_type, SeqEntryPtr sep)
22080 {
22081   ValNodePtr fields = NULL;
22082 
22083   /* get a list of the fields that are appropriate for the objects collected */
22084   switch (field_type) {
22085     case FieldType_cds_gene_prot:
22086       fields = MakeCDSGeneProtFieldTypeList ();
22087       break;
22088     case FieldType_source_qual:
22089       fields = GetSourceQualSampleFieldList (sep);
22090       break;
22091     case FieldType_feature_field:
22092       fields = GetFeatureQualFieldList (sep);
22093       break;
22094     case FieldType_molinfo_field:
22095       fields = MakeSequenceQualFieldTypeList ();
22096       break;
22097     case FieldType_pub:
22098       fields = MakePubFieldTypeList ();
22099       break;
22100     case FieldType_rna_field:
22101       fields = GetRnaQualFieldList (sep);
22102       break;
22103     case FieldType_struc_comment_field:
22104       fields = GetStructuredCommentFieldList (sep);
22105       break;
22106     case FieldType_misc:
22107       ValNodeAddInt (&fields, FieldType_misc, Misc_field_genome_project_id);
22108       ValNodeAddInt (&fields, FieldType_misc, Misc_field_comment_descriptor);
22109       ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline);
22110       ValNodeAddInt (&fields, FieldType_misc, Misc_field_keyword);
22111       break;
22112     case FieldType_dblink:
22113       ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_trace_assembly);
22114       ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_bio_sample);
22115       ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_probe_db);
22116       ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_sequence_read_archve);
22117       ValNodeAddInt (&fields, FieldType_dblink, DBLink_field_type_bio_project);
22118       break;
22119   }
22120   return fields;
22121 }
22122 
22123 
GetAECRSampleListForSeqEntry(Uint1 field_type,SeqEntryPtr sep)22124 NLM_EXTERN ValNodePtr GetAECRSampleListForSeqEntry (Uint1 field_type, SeqEntryPtr sep)
22125 {
22126   ValNodePtr          object_list;
22127   ValNodePtr          fields = NULL, vnp;
22128   ValNodePtr          list = NULL;
22129   AECRSamplePtr       sample;
22130   BatchExtraPtr       batch_extra;
22131 
22132   object_list = GetObjectListForFieldType (field_type, sep);
22133 
22134   /* get a list of the fields that are appropriate for the objects collected */
22135   fields = GetFieldListForFieldType (field_type, sep);
22136 
22137   batch_extra = BatchExtraNew ();
22138   for (vnp = fields; vnp != NULL; vnp = vnp->next) {
22139     InitBatchExtraForField (batch_extra, vnp, sep);
22140   }
22141   for (vnp = fields; vnp != NULL; vnp = vnp->next) {
22142     sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra);
22143     if (sample != NULL && sample->num_found > 0) {
22144       ValNodeAddPointer (&list, 0, sample);
22145     } else {
22146       sample = AECRSampleFree (sample);
22147     }
22148   }
22149 
22150   batch_extra = BatchExtraFree (batch_extra);
22151   fields = FieldTypeListFree (fields);
22152 
22153   object_list = FreeObjectList (object_list);
22154   return list;
22155 }
22156 
22157 
GetAECRSampleList(AECRActionPtr act,SeqEntryPtr sep)22158 NLM_EXTERN ValNodePtr GetAECRSampleList (AECRActionPtr act, SeqEntryPtr sep)
22159 {
22160   Uint1               field_type;
22161   Uint2               entityID;
22162   ValNodePtr          object_list;
22163   ValNodePtr          fields = NULL, vnp;
22164   ValNodePtr          list = NULL;
22165   AECRSamplePtr       sample;
22166   BatchExtraPtr       batch_extra;
22167 
22168   batch_extra = BatchExtraNew ();
22169   InitBatchExtraForAECRAction (batch_extra, act, sep);
22170 
22171   field_type = FieldTypeFromAECRAction (act);
22172   if (field_type == FieldType_cds_gene_prot) {
22173     entityID = ObjMgrGetEntityIDForChoice(sep);
22174     object_list = BuildCGPSetList (entityID, act, NULL);
22175   } else {
22176     object_list = GetObjectListForAECRActionEx (sep, act, batch_extra);
22177   }
22178 
22179   /* get fields used in action */
22180   fields = GetFieldTypeListFromAECRAction (act);
22181 
22182   for (vnp = fields; vnp != NULL; vnp = vnp->next) {
22183     sample = GetAECRSampleFromObjectListEx (object_list, vnp, batch_extra);
22184     if (sample != NULL && sample->num_found > 0) {
22185       ValNodeAddPointer (&list, 0, sample);
22186     } else {
22187       sample = AECRSampleFree (sample);
22188     }
22189   }
22190 
22191   fields = FieldTypeListFree (fields);
22192 
22193   batch_extra = BatchExtraFree (batch_extra);
22194   DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
22195 
22196   FreeObjectList (object_list);
22197   return list;
22198 }
22199 
22200 
GetFieldSampleFromList(ValNodePtr list,FieldTypePtr field)22201 NLM_EXTERN AECRSamplePtr GetFieldSampleFromList (ValNodePtr list, FieldTypePtr field)
22202 {
22203   AECRSamplePtr sample = NULL;
22204 
22205   while (list != NULL && sample == NULL) {
22206     sample = list->data.ptrvalue;
22207     if (sample != NULL && !DoFieldTypesMatch (sample->field, field)) {
22208       sample = NULL;
22209     }
22210     list = list->next;
22211   }
22212   return sample;
22213 }
22214 
22215 
RemoveFieldsForWhichThereAreNoData(ValNodePtr PNTR field_list,ValNodePtr object_list)22216 static void RemoveFieldsForWhichThereAreNoData (ValNodePtr PNTR field_list, ValNodePtr object_list)
22217 {
22218   ValNodePtr vnp_prev = NULL, vnp_f, vnp_next;
22219   AECRSamplePtr       sample;
22220 
22221   if (field_list == NULL || *field_list == NULL) {
22222     return;
22223   }
22224 
22225   vnp_prev = NULL;
22226   vnp_f = *field_list;
22227   while (vnp_f != NULL) {
22228     vnp_next = vnp_f->next;
22229     if (vnp_f->choice == FieldType_source_qual
22230         || vnp_f->choice == FieldType_feature_field
22231         || vnp_f->choice == FieldType_rna_field) {
22232       vnp_prev = vnp_f;
22233     } else {
22234       sample = GetAECRSampleFromObjectList (object_list, vnp_f);
22235       if (sample == NULL || sample->num_found == 0) {
22236         if (vnp_prev == NULL) {
22237           *field_list = vnp_next;
22238         } else {
22239           vnp_prev->next = vnp_next;
22240         }
22241         vnp_f->next = NULL;
22242         vnp_f = FieldTypeFree (vnp_f);
22243       } else {
22244         vnp_prev = vnp_f;
22245       }
22246       sample = AECRSampleFree (sample);
22247     }
22248     vnp_f = vnp_next;
22249   }
22250 }
22251 
22252 
GetAECRExistingTextList(Uint1 field_type,SeqEntryPtr sep,FILE * fp)22253 NLM_EXTERN void GetAECRExistingTextList (Uint1 field_type, SeqEntryPtr sep, FILE *fp)
22254 {
22255   ValNodePtr          object_list, vnp_f, vnp_o;
22256   ValNodePtr          fields = NULL;
22257   BioseqPtr           bsp;
22258   Char                id_buf[255];
22259   CharPtr             txt1 = NULL;
22260 
22261   object_list = GetObjectListForFieldType (field_type, sep);
22262 
22263   /* get a list of the fields that are appropriate for the objects collected */
22264   fields = GetFieldListForFieldType (field_type, sep);
22265 
22266   /* remove fields for which there is no data */
22267   RemoveFieldsForWhichThereAreNoData (&fields, object_list);
22268 
22269   /* add header */
22270   fprintf (fp, "Accession");
22271   for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) {
22272     txt1 = SummarizeFieldType (vnp_f);
22273     fprintf (fp, "\t%s", txt1);
22274     txt1 = MemFree (txt1);
22275   }
22276   fprintf (fp, "\n");
22277 
22278   for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) {
22279     bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue);
22280     if (bsp == NULL) {
22281       id_buf[0] = 0;
22282     } else {
22283       SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
22284     }
22285     fprintf (fp, "%s", id_buf);
22286     for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) {
22287       txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL);
22288       fprintf (fp, "\t%s", txt1 == NULL ? "" : txt1);
22289       txt1 = MemFree (txt1);
22290     }
22291     fprintf (fp, "\n");
22292   }
22293 
22294   fields = FieldTypeListFree (fields);
22295 
22296   object_list = FreeObjectList (object_list);
22297 }
22298 
22299 
InsertBlanksInRow(ValNodePtr row,Int4 insert_pos,Int4Ptr num_field_per_pos,Int4 num_blanks)22300 static void InsertBlanksInRow (ValNodePtr row, Int4 insert_pos, Int4Ptr num_field_per_pos, Int4 num_blanks)
22301 {
22302   ValNodePtr vnp, prev, vnp_blank;
22303   Int4       pos = 0, skip;
22304 
22305   /* first, skip accession */
22306   prev = row;
22307   vnp = row->next;
22308   while (vnp != NULL && pos <= insert_pos) {
22309     for (skip = 0; skip < num_field_per_pos[pos] && vnp != NULL; skip++, vnp = vnp->next) {
22310       prev = vnp;
22311     }
22312     pos++;
22313   }
22314   for (skip = 0; skip < num_blanks; skip++) {
22315     vnp_blank = ValNodeNew (NULL);
22316     vnp_blank->next = prev->next;
22317     prev->next = vnp_blank;
22318   }
22319 
22320 }
22321 
22322 
AddListToTabTable(ValNodePtr vals,ValNodePtr text_table,ValNodePtr this_row,Int4 pos,Int4Ptr num_field_per_pos)22323 static void AddListToTabTable (ValNodePtr vals, ValNodePtr text_table, ValNodePtr this_row, Int4 pos, Int4Ptr num_field_per_pos)
22324 {
22325   Int4 num_new_fields;
22326   ValNodePtr vnp;
22327 
22328   num_new_fields = ValNodeLen (vals);
22329   if (num_new_fields > num_field_per_pos[pos]) {
22330     /* go back and insert blanks in all the previous rows */
22331     for (vnp = text_table; vnp != NULL; vnp = vnp->next) {
22332       InsertBlanksInRow (vnp->data.ptrvalue, pos, num_field_per_pos, num_new_fields - num_field_per_pos[pos]);
22333     }
22334     num_field_per_pos[pos] = num_new_fields;
22335   }
22336   ValNodeLink (&this_row, vals);
22337   while (num_new_fields < num_field_per_pos[pos]) {
22338     ValNodeAddPointer (&this_row, 0, NULL);
22339     num_new_fields++;
22340   }
22341 }
22342 
22343 
StartRowWithSourceFields(CharPtr id,BioseqPtr bsp,ValNodePtr src_field_list,Int4Ptr num_field_per_pos,ValNodePtr text_table)22344 static ValNodePtr StartRowWithSourceFields (CharPtr id, BioseqPtr bsp, ValNodePtr src_field_list, Int4Ptr num_field_per_pos, ValNodePtr text_table)
22345 {
22346   ValNodePtr text_row = NULL;
22347   SeqDescPtr sdp;
22348   ValNodePtr vals, vnp_f;
22349   Int4       pos;
22350   SeqMgrDescContext   context;
22351 
22352   /* add accession */
22353   ValNodeAddPointer (&text_row, 0, StringSave (id));
22354 
22355   /* add source fields */
22356   if (src_field_list != NULL) {
22357     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
22358     for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) {
22359       vals = GetMultipleFieldValuesForObject (OBJ_SEQDESC, sdp, vnp_f, NULL, NULL);
22360       AddListToTabTable (vals, text_table, text_row, pos, num_field_per_pos);
22361     }
22362   }
22363   return text_row;
22364 }
22365 
22366 
ExportFieldTable(Uint1 field_type,ValNodePtr src_field_list,SeqEntryPtr sep,FILE * fp)22367 NLM_EXTERN void ExportFieldTable (Uint1 field_type, ValNodePtr src_field_list, SeqEntryPtr sep, FILE *fp)
22368 {
22369   ValNodePtr          object_list, vnp_f, vnp_o;
22370   ValNodePtr          fields = NULL;
22371   ValNodePtr          text_table = NULL, text_row;
22372   BioseqPtr           bsp;
22373   Char                id_buf[255];
22374   CharPtr             txt1 = NULL, title;
22375   SeqDescrPtr         pub_sdp;
22376   SeqMgrDescContext   pub_context;
22377   Int4                num_orig_fields;
22378   Int4Ptr             num_field_per_pos;
22379   Int4                pos, i;
22380 
22381   if (field_type == 0) {
22382     object_list = GetObjectListForFieldType (FieldType_source_qual, sep);
22383   } else if (field_type == FieldType_misc) {
22384     object_list = CollectDeflineDescriptors (sep);
22385     ValNodeAddInt (&fields, FieldType_misc, Misc_field_defline);
22386   } else if (field_type == FieldType_pub) {
22387     object_list = GetObjectListForFieldType (FieldType_source_qual, sep);
22388     /* only get publication titles */
22389     ValNodeAddInt (&fields, FieldType_pub, Publication_field_title);
22390   } else {
22391     object_list = GetObjectListForFieldType (field_type, sep);
22392     /* get a list of the fields that are appropriate for the objects collected */
22393     fields = GetFieldListForFieldType (field_type, sep);
22394     /* remove fields for which there is no data */
22395     RemoveFieldsForWhichThereAreNoData (&fields, object_list);
22396   }
22397 
22398   num_orig_fields = ValNodeLen (src_field_list);
22399   num_field_per_pos = (Int4Ptr) MemNew (sizeof (Int4) * num_orig_fields);
22400   for (pos = 0; pos < num_orig_fields; pos++) {
22401     num_field_per_pos[pos] = 1;
22402   }
22403 
22404   /* get text table */
22405   for (vnp_o = object_list; vnp_o != NULL; vnp_o = vnp_o->next) {
22406     bsp = GetSequenceForObject (vnp_o->choice, vnp_o->data.ptrvalue);
22407     if (bsp != NULL) {
22408       /* first column is accession */
22409       SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
22410       if (field_type == FieldType_pub) {
22411         for (pub_sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &pub_context);
22412              pub_sdp != NULL;
22413              pub_sdp = SeqMgrGetNextDescriptor (bsp, pub_sdp, Seq_descr_pub, &pub_context)) {
22414 
22415           /* Get Publication Title */
22416           title = GetFieldValueForObject (OBJ_SEQDESC, pub_sdp, fields, NULL);
22417 
22418           if (!StringHasNoText (title)) {
22419             text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table);
22420 
22421             /* add publication title */
22422             ValNodeAddPointer (&text_row, 0, title);
22423 
22424             /* add row to table */
22425             ValNodeAddPointer (&text_table, 0, text_row);
22426           }
22427           title = MemFree (title);
22428         }
22429       } else {
22430         text_row = StartRowWithSourceFields (id_buf, bsp, src_field_list, num_field_per_pos, text_table);
22431         /* get requested fields */
22432         for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) {
22433           txt1 = GetFieldValueForObject (vnp_o->choice, vnp_o->data.ptrvalue, vnp_f, NULL);
22434           ValNodeAddPointer (&text_row, 0, txt1);
22435         }
22436         /* add row to table */
22437         ValNodeAddPointer (&text_table, 0, text_row);
22438       }
22439     }
22440   }
22441 
22442   /* add header */
22443   /* accession is first column */
22444   fprintf (fp, "Accession");
22445   /* list source fields first */
22446   for (vnp_f = src_field_list, pos = 0; vnp_f != NULL; vnp_f = vnp_f->next, pos++) {
22447     txt1 = SummarizeFieldType (vnp_f);
22448     for (i = 0; i < num_field_per_pos[pos]; i++) {
22449       fprintf (fp, "\t%s", txt1);
22450     }
22451     txt1 = MemFree (txt1);
22452   }
22453   /* list fields */
22454   for (vnp_f = fields; vnp_f != NULL; vnp_f = vnp_f->next) {
22455     txt1 = SummarizeFieldType (vnp_f);
22456     fprintf (fp, "\t%s", txt1);
22457     txt1 = MemFree (txt1);
22458   }
22459   fprintf (fp, "\n");
22460   WriteTabTableToFile (text_table, fp);
22461   FreeTabTable(text_table);
22462 
22463   fields = FieldTypeListFree (fields);
22464   object_list = FreeObjectList (object_list);
22465   num_field_per_pos = MemFree (num_field_per_pos);
22466 }
22467 
22468 
22469 /* This section handles parsing where the source field and destination field may not be on the same
22470  * group of objects. */
22471 typedef struct parsesourceinfo
22472 {
22473   BioseqPtr   bsp;
22474   SeqFeatPtr  sfp;
22475   SeqDescrPtr sdp;
22476   SeqIdPtr    sip;
22477   ValNodePtr  dest_list;
22478   CharPtr     parse_src_txt;
22479 } ParseSourceInfoData, PNTR ParseSourceInfoPtr;
22480 
ParseSourceInfoNew(BioseqPtr bsp,SeqFeatPtr sfp,SeqDescrPtr sdp,SeqIdPtr sip,CharPtr parse_src_txt)22481 static ParseSourceInfoPtr ParseSourceInfoNew (BioseqPtr bsp, SeqFeatPtr sfp, SeqDescrPtr sdp, SeqIdPtr sip, CharPtr parse_src_txt)
22482 {
22483   ParseSourceInfoPtr psip;
22484 
22485   psip = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData));
22486   if (psip != NULL) {
22487     psip->bsp = bsp;
22488     psip->sdp = sdp;
22489     psip->sfp = sfp;
22490     psip->sip = sip;
22491     psip->dest_list = NULL;
22492     psip->parse_src_txt = parse_src_txt;
22493   }
22494   return psip;
22495 }
22496 
22497 
ParseSourceInfoFree(ParseSourceInfoPtr psip)22498 static ParseSourceInfoPtr ParseSourceInfoFree (ParseSourceInfoPtr psip)
22499 {
22500   if (psip != NULL)
22501   {
22502     psip->dest_list = ValNodeFree (psip->dest_list);
22503     psip->parse_src_txt = MemFree (psip->parse_src_txt);
22504     psip = MemFree (psip);
22505   }
22506   return psip;
22507 }
22508 
ParseSourceInfoCopy(ParseSourceInfoPtr psip)22509 static ParseSourceInfoPtr ParseSourceInfoCopy (ParseSourceInfoPtr psip)
22510 {
22511   ParseSourceInfoPtr pcopy = NULL;
22512 
22513   if (psip != NULL)
22514   {
22515     pcopy = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData));
22516     if (pcopy != NULL) {
22517       pcopy->bsp = psip->bsp;
22518       pcopy->sfp = psip->sfp;
22519       pcopy->sdp = psip->sdp;
22520       pcopy->sip = psip->sip;
22521       pcopy->dest_list = NULL;
22522       pcopy->parse_src_txt = NULL;
22523     }
22524   }
22525   return pcopy;
22526 }
22527 
ParseSourceListFree(ValNodePtr vnp)22528 static ValNodePtr ParseSourceListFree (ValNodePtr vnp)
22529 {
22530   ValNodePtr vnp_next;
22531   while (vnp != NULL) {
22532     vnp_next = vnp->next;
22533     vnp->next = NULL;
22534     vnp->data.ptrvalue = ParseSourceInfoFree (vnp->data.ptrvalue);
22535     vnp = ValNodeFree (vnp);
22536     vnp = vnp_next;
22537   }
22538   return vnp;
22539 }
22540 
22541 
22542 static void
GetDeflineSourcesForBioseq(BioseqPtr bsp,TextPortionPtr portion,ValNodePtr PNTR source_list)22543 GetDeflineSourcesForBioseq
22544 (BioseqPtr              bsp,
22545  TextPortionPtr         portion,
22546  ValNodePtr PNTR source_list)
22547 {
22548   SeqDescrPtr        sdp;
22549   SeqMgrDescContext  dcontext;
22550   CharPtr            str;
22551   ParseSourceInfoPtr psip;
22552 
22553   if (bsp == NULL || source_list == NULL)
22554   {
22555     return;
22556   }
22557 
22558   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
22559   while (sdp != NULL)
22560   {
22561     str = GetTextPortionFromString (sdp->data.ptrvalue, portion);
22562     if (str != NULL) {
22563       psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
22564       if (psip != NULL) {
22565         ValNodeAddPointer (source_list, 0, psip);
22566       } else {
22567         str = MemFree (str);
22568       }
22569     }
22570     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext);
22571   }
22572 }
22573 
22574 
GetIDSrc(SeqIdPtr sip,Uint1 id_type,CharPtr tag)22575 static CharPtr GetIDSrc (SeqIdPtr sip, Uint1 id_type, CharPtr tag)
22576 {
22577   DbtagPtr    dbt = NULL;
22578   ObjectIdPtr oip = NULL;
22579   Char        id_str[128];
22580   CharPtr     str_src = NULL;
22581 
22582   if (sip == NULL || sip->choice != id_type) return NULL;
22583 
22584   if (id_type == SEQID_GENERAL)
22585   {
22586     dbt = (DbtagPtr) sip->data.ptrvalue;
22587     if (dbt == NULL || (tag != NULL && StringCmp (dbt->db, tag) != 0)) return NULL;
22588     oip = dbt->tag;
22589   }
22590   else if (id_type == SEQID_LOCAL)
22591   {
22592     oip = sip->data.ptrvalue;
22593   }
22594 
22595   if (oip == NULL)
22596   {
22597     SeqIdWrite (sip, id_str, PRINTID_REPORT, sizeof (id_str));
22598     str_src = StringSave (id_str);
22599   }
22600   else
22601   {
22602     if (oip->str == NULL)
22603     {
22604       sprintf (id_str, "%d", oip->id);
22605       str_src = StringSave (id_str);
22606     }
22607     else
22608     {
22609       str_src = StringSave (oip->str);
22610     }
22611   }
22612   return str_src;
22613 }
22614 
22615 
22616 static void
GetIDSourcesForBioseq(BioseqPtr bsp,TextPortionPtr portion,Uint1 id_type,CharPtr tag,ValNodePtr PNTR source_list)22617 GetIDSourcesForBioseq
22618 (BioseqPtr       bsp,
22619  TextPortionPtr  portion,
22620  Uint1           id_type,
22621  CharPtr         tag,
22622  ValNodePtr PNTR source_list)
22623 {
22624   SeqIdPtr           sip;
22625   ParseSourceInfoPtr psip;
22626   CharPtr            src_str = NULL, str;
22627 
22628   if (bsp == NULL || source_list == NULL)
22629   {
22630     return;
22631   }
22632 
22633   sip = bsp->id;
22634   while (sip != NULL)
22635   {
22636     if ((src_str = GetIDSrc (sip, id_type, tag)) != NULL) {
22637       str = GetTextPortionFromString (src_str, portion);
22638       if (str != NULL) {
22639         psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str);
22640         if (psip != NULL) {
22641           ValNodeAddPointer (source_list, 0, psip);
22642         } else {
22643           str = MemFree (str);
22644         }
22645       }
22646       src_str = MemFree (src_str);
22647     }
22648     sip = sip->next;
22649   }
22650 }
22651 
22652 
22653 static void
GetLocalIDSourcesForBioseq(BioseqPtr bsp,TextPortionPtr tp,ValNodePtr PNTR source_list)22654 GetLocalIDSourcesForBioseq
22655 (BioseqPtr       bsp,
22656  TextPortionPtr  tp,
22657  ValNodePtr PNTR source_list)
22658 {
22659   GetIDSourcesForBioseq (bsp, tp, SEQID_LOCAL, NULL, source_list);
22660 }
22661 
22662 
GetNcbiFileSourceForBioseq(BioseqPtr bsp,TextPortionPtr tp,ValNodePtr PNTR source_list)22663 static void GetNcbiFileSourceForBioseq
22664 (BioseqPtr       bsp,
22665  TextPortionPtr  tp,
22666  ValNodePtr PNTR source_list)
22667 {
22668   GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, "NCBIFILE", source_list);
22669 }
22670 
22671 
22672 static void
GetGeneralIdTextSourcesForBioseq(BioseqPtr bsp,Boolean db_only,TextPortionPtr portion,ValNodePtr PNTR source_list)22673 GetGeneralIdTextSourcesForBioseq
22674 (BioseqPtr       bsp,
22675  Boolean         db_only,
22676  TextPortionPtr  portion,
22677  ValNodePtr PNTR source_list)
22678 {
22679   SeqIdPtr           sip;
22680   ParseSourceInfoPtr psip;
22681   DbtagPtr           dbtag;
22682   CharPtr            src_str = NULL, str;
22683 
22684   if (bsp == NULL || source_list == NULL)
22685   {
22686     return;
22687   }
22688 
22689   for (sip = bsp->id; sip != NULL; sip = sip->next) {
22690     if (sip->choice == SEQID_GENERAL && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) {
22691       if (db_only) {
22692         str = GetTextPortionFromString (dbtag->db, portion);
22693       } else {
22694         src_str = GetDbtagString (dbtag);
22695         str = GetTextPortionFromString (src_str, portion);
22696         src_str = MemFree (src_str);
22697       }
22698       if (str != NULL) {
22699         psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str);
22700         if (psip != NULL) {
22701           ValNodeAddPointer (source_list, 0, psip);
22702         } else {
22703           str = MemFree (str);
22704         }
22705       }
22706     }
22707   }
22708 }
22709 
22710 
GetGeneralIDSourcesForBioseq(BioseqPtr bsp,ValNodePtr general_id,TextPortionPtr tp,ValNodePtr PNTR source_list)22711 static void GetGeneralIDSourcesForBioseq
22712 (BioseqPtr       bsp,
22713  ValNodePtr      general_id,
22714  TextPortionPtr  tp,
22715  ValNodePtr PNTR source_list)
22716 {
22717   if (general_id == NULL) {
22718     return;
22719   }
22720   switch (general_id->choice) {
22721     case ParseSrcGeneralId_whole_text:
22722       GetGeneralIdTextSourcesForBioseq (bsp, FALSE, tp, source_list);
22723       break;
22724     case ParseSrcGeneralId_db:
22725       GetGeneralIdTextSourcesForBioseq (bsp, TRUE, tp, source_list);
22726       break;
22727     case ParseSrcGeneralId_tag:
22728       if (StringHasNoText (general_id->data.ptrvalue)) {
22729         GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, NULL, source_list);
22730       } else {
22731         GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, general_id->data.ptrvalue, source_list);
22732       }
22733       break;
22734     default:
22735       break;
22736   }
22737 }
22738 
22739 
StripBankitCommentForParse(SeqDescrPtr sdp,TextPortionPtr tp)22740 static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp)
22741 {
22742   UserObjectPtr      uop;
22743   ObjectIdPtr        oip;
22744   UserFieldPtr       ufp;
22745 
22746   if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL) {
22747     return;
22748   }
22749 
22750   /* Bankit Comments */
22751   uop = (UserObjectPtr) sdp->data.ptrvalue;
22752   if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) {
22753     oip = uop->type;
22754     if (oip != NULL && StringCmp (oip->str, "Submission") == 0) {
22755       for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
22756         oip = ufp->label;
22757         if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) {
22758           ReplaceStringForParse (ufp->data.ptrvalue, tp);
22759         }
22760       }
22761     }
22762   }
22763 }
22764 
22765 
StripStructuredCommentForParse(SeqDescrPtr sdp,CharPtr comment_field,TextPortionPtr tp)22766 static void StripStructuredCommentForParse (SeqDescrPtr sdp, CharPtr comment_field, TextPortionPtr tp)
22767 {
22768   UserObjectPtr      uop;
22769   ObjectIdPtr        oip;
22770   UserFieldPtr       ufp;
22771 
22772   if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL || StringHasNoText (comment_field)) {
22773     return;
22774   }
22775 
22776   uop = (UserObjectPtr) sdp->data.ptrvalue;
22777   if (IsUserObjectStructuredComment (uop)) {
22778     for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
22779       oip = ufp->label;
22780       if (oip != NULL && StringCmp (oip->str, comment_field) == 0) {
22781         ReplaceStringForParse (ufp->data.ptrvalue, tp);
22782       }
22783     }
22784   }
22785 }
22786 
22787 
22788 static void
GetBankitCommentSourcesForBioseq(BioseqPtr bsp,TextPortionPtr tp,ValNodePtr PNTR source_list)22789 GetBankitCommentSourcesForBioseq
22790 (BioseqPtr       bsp,
22791  TextPortionPtr  tp,
22792  ValNodePtr PNTR source_list)
22793 {
22794   SeqDescrPtr        sdp;
22795   SeqMgrDescContext  dcontext;
22796   ParseSourceInfoPtr psip;
22797   UserObjectPtr      uop;
22798   ObjectIdPtr        oip;
22799   UserFieldPtr       ufp;
22800   CharPtr            str = NULL;
22801 
22802   if (bsp == NULL || source_list == NULL) {
22803     return;
22804   }
22805 
22806   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
22807   while (sdp != NULL) {
22808     if (sdp->extended != 0) {
22809       /* Bankit Comments */
22810       uop = (UserObjectPtr) sdp->data.ptrvalue;
22811       if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) {
22812         oip = uop->type;
22813         if (oip != NULL && StringCmp (oip->str, "Submission") == 0) {
22814           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
22815             oip = ufp->label;
22816             if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) {
22817               str = GetTextPortionFromString (ufp->data.ptrvalue, tp);
22818               if (str != NULL) {
22819                 psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
22820                 if (psip == NULL) {
22821                   str = MemFree (str);
22822                 } else {
22823                   ValNodeAddPointer (source_list, 0, psip);
22824                 }
22825               }
22826             }
22827           }
22828         }
22829       }
22830     }
22831     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
22832   }
22833 }
22834 
22835 
22836 static void
GetCommentSourcesForBioseq(BioseqPtr bsp,TextPortionPtr tp,ValNodePtr PNTR source_list)22837 GetCommentSourcesForBioseq
22838 (BioseqPtr       bsp,
22839  TextPortionPtr  tp,
22840  ValNodePtr PNTR source_list)
22841 {
22842   SeqDescrPtr        sdp;
22843   SeqFeatPtr         sfp;
22844   SeqMgrFeatContext  fcontext;
22845   SeqMgrDescContext  dcontext;
22846   ParseSourceInfoPtr psip;
22847   CharPtr            str;
22848 
22849   if (bsp == NULL || source_list == NULL) {
22850     return;
22851   }
22852 
22853   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext);
22854   while (sdp != NULL) {
22855     str = GetTextPortionFromString (sdp->data.ptrvalue, tp);
22856     if (str != NULL) {
22857       psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
22858       if (psip == NULL) {
22859         str = MemFree (str);
22860       } else {
22861         ValNodeAddPointer (source_list, 0, psip);
22862       }
22863     }
22864     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext);
22865   }
22866 
22867   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_COMMENT, 0, &fcontext);
22868   while (sfp != NULL) {
22869     str = GetTextPortionFromString (sfp->data.value.ptrvalue, tp);
22870     if (str != NULL) {
22871       psip = ParseSourceInfoNew (bsp, sfp, NULL, NULL, str);
22872       if (psip == NULL) {
22873         str = MemFree (str);
22874       } else {
22875         ValNodeAddPointer (source_list, 0, psip);
22876       }
22877     }
22878     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_COMMENT, 0, &fcontext);
22879   }
22880   GetBankitCommentSourcesForBioseq (bsp, tp, source_list);
22881 }
22882 
22883 
22884 static void
GetStructuredCommentSourcesForBioseq(BioseqPtr bsp,TextPortionPtr tp,CharPtr comment_field,ValNodePtr PNTR source_list)22885 GetStructuredCommentSourcesForBioseq
22886 (BioseqPtr       bsp,
22887  TextPortionPtr  tp,
22888  CharPtr         comment_field,
22889  ValNodePtr PNTR source_list)
22890 {
22891   SeqDescrPtr        sdp;
22892   UserObjectPtr      uop;
22893   ObjectIdPtr        oip;
22894   UserFieldPtr       ufp;
22895   SeqMgrDescContext  dcontext;
22896   CharPtr            str;
22897   ParseSourceInfoPtr psip;
22898 
22899   if (bsp == NULL || source_list == NULL)
22900   {
22901     return;
22902   }
22903 
22904   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
22905   while (sdp != NULL) {
22906     if (sdp->extended != 0
22907         && sdp->data.ptrvalue != NULL) {
22908       uop = (UserObjectPtr) sdp->data.ptrvalue;
22909       if (IsUserObjectStructuredComment (uop)) {
22910         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
22911           oip = ufp->label;
22912           if (oip != NULL && StringCmp (oip->str, comment_field) == 0) {
22913             str = GetTextPortionFromString (ufp->data.ptrvalue, tp);
22914             if (str != NULL) {
22915               psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
22916               if (psip == NULL) {
22917                 str = MemFree (str);
22918               } else {
22919                 ValNodeAddPointer (source_list, 0, psip);
22920               }
22921             }
22922           }
22923         }
22924       }
22925     }
22926     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
22927   }
22928 }
22929 
22930 
GetFlatFileSourcesForBioseq(BioseqPtr bsp,TextPortionPtr tp,ValNodePtr PNTR source_list)22931 static void GetFlatFileSourcesForBioseq
22932 (BioseqPtr       bsp,
22933  TextPortionPtr  tp,
22934  ValNodePtr PNTR source_list)
22935 
22936 {
22937   SeqEntryPtr      sep;
22938   Asn2gbJobPtr     ajp;
22939   Int4             index;
22940   ErrSev           level;
22941   CharPtr          string, str;
22942   ParseSourceInfoPtr psip;
22943 
22944   if (bsp == NULL || source_list == NULL)
22945   {
22946     return;
22947   }
22948 
22949   sep = SeqMgrGetSeqEntryForData (bsp);
22950   if (sep == NULL) {
22951     return;
22952   }
22953 
22954   level = ErrSetMessageLevel (SEV_MAX);
22955 
22956   ajp = asn2gnbk_setup (bsp, NULL, NULL, (FmtType)GENBANK_FMT, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0, NULL);
22957   if (ajp != NULL) {
22958     for (index = 0; index < ajp->numParagraphs; index++) {
22959       string = asn2gnbk_format (ajp, (Int4) index);
22960       if (string != NULL && *string != '\0') {
22961         CompressSpaces (string);
22962         str = GetTextPortionFromString (string, tp);
22963         if (str != NULL) {
22964           psip = ParseSourceInfoNew (bsp, NULL, NULL, NULL, str);
22965           if (psip == NULL) {
22966             str = MemFree (str);
22967           } else {
22968             ValNodeAddPointer (source_list, 0, psip);
22969           }
22970         }
22971       }
22972       MemFree (string);
22973     }
22974     asn2gnbk_cleanup (ajp);
22975   }
22976 
22977   ErrSetMessageLevel (level);
22978 }
22979 
22980 
22981 const CharPtr nomial_keywords[] = {
22982 "f. sp. ",
22983 "var.",
22984 "pv.",
22985 "bv.",
22986 "serovar",
22987 "subsp." };
22988 
22989 const Int4 num_nomial_keywords = sizeof(nomial_keywords) / sizeof (CharPtr);
22990 
GetTextAfterNomial(CharPtr taxname)22991 static CharPtr GetTextAfterNomial (CharPtr taxname)
22992 
22993 {
22994   CharPtr ptr, nomial_end;
22995   Int4    i;
22996   Boolean found_keyword = TRUE;
22997 
22998   ptr = StringChr (taxname, ' ');
22999   if (ptr == NULL) return NULL;
23000   /* skip over the first word and the spaces after it. */
23001   while (*ptr == ' ') {
23002     ptr++;
23003   }
23004   ptr = StringChr (ptr, ' ');
23005   /* if there are only two words, give up. */
23006   if (ptr == NULL) {
23007     return NULL;
23008   }
23009   nomial_end = ptr;
23010   while (*ptr == ' ') {
23011     ptr++;
23012   }
23013 
23014   while (found_keyword) {
23015     found_keyword = FALSE;
23016     /* if the next word is a nomial keyword, skip that plus the first word that follows it. */
23017     for (i = 0; i < num_nomial_keywords && *nomial_end != 0; i++) {
23018       if (StringNCmp (ptr, nomial_keywords[i], StringLen(nomial_keywords[i])) == 0) {
23019         ptr += StringLen(nomial_keywords[i]);
23020         while (*ptr == ' ' ) {
23021           ptr++;
23022         }
23023         nomial_end = StringChr (ptr, ' ');
23024         if (nomial_end == NULL) {
23025           nomial_end = ptr + StringLen (ptr);
23026         } else {
23027           ptr = nomial_end;
23028           while (*ptr == ' ') {
23029             ptr++;
23030           }
23031           found_keyword = TRUE;
23032         }
23033       }
23034     }
23035   }
23036   return nomial_end;
23037 }
23038 
23039 
23040 static void
GetOrgParseSourcesForBioSource(BioSourcePtr biop,BioseqPtr bsp,SeqDescrPtr sdp,SeqFeatPtr sfp,ParseSrcOrgPtr o,TextPortionPtr tp,ValNodePtr PNTR source_list)23041 GetOrgParseSourcesForBioSource
23042 (BioSourcePtr    biop,
23043  BioseqPtr       bsp,
23044  SeqDescrPtr     sdp,
23045  SeqFeatPtr      sfp,
23046  ParseSrcOrgPtr  o,
23047  TextPortionPtr  tp,
23048  ValNodePtr PNTR source_list)
23049 {
23050   CharPtr str = NULL, portion, tmp;
23051   ValNode vn;
23052   ParseSourceInfoPtr psip;
23053 
23054   if (biop == NULL || o == NULL || o->field == NULL || source_list == NULL) return;
23055 
23056   switch (o->field->choice) {
23057     case ParseSrcOrgChoice_source_qual :
23058       vn.choice = SourceQualChoice_textqual;
23059       vn.data.intvalue = o->field->data.intvalue;
23060       vn.next = NULL;
23061       str = GetSourceQualFromBioSource (biop, &vn, NULL);
23062       break;
23063     case ParseSrcOrgChoice_taxname_after_binomial :
23064       vn.choice = SourceQualChoice_textqual;
23065       vn.data.intvalue = Source_qual_taxname;
23066       vn.next = NULL;
23067       str = GetSourceQualFromBioSource (biop, &vn, NULL);
23068       tmp = GetTextAfterNomial (str);
23069       tmp = StringSave (tmp);
23070       str = MemFree (str);
23071       str = tmp;
23072       break;
23073   }
23074   portion = GetTextPortionFromString (str, tp);
23075   if (portion != NULL) {
23076     psip = ParseSourceInfoNew (bsp, sfp, sdp, NULL, portion);
23077     if (psip == NULL) {
23078       portion = MemFree (portion);
23079     } else {
23080       ValNodeAddPointer (source_list, 0, psip);
23081     }
23082   }
23083   str = MemFree (str);
23084 }
23085 
23086 
GetOrgParseSourcesForBioseq(BioseqPtr bsp,ParseSrcOrgPtr o,TextPortionPtr tp,ValNodePtr PNTR source_list)23087 static void GetOrgParseSourcesForBioseq (BioseqPtr bsp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list)
23088 {
23089   SeqDescrPtr        sdp;
23090   SeqFeatPtr         sfp;
23091   SeqMgrFeatContext  fcontext;
23092   SeqMgrDescContext  dcontext;
23093 
23094   if (bsp == NULL || o == NULL || source_list == NULL) return;
23095 
23096   if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) {
23097     for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
23098          sdp != NULL;
23099          sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
23100       GetOrgParseSourcesForBioSource (sdp->data.ptrvalue, bsp, sdp, NULL, o, tp, source_list);
23101     }
23102   }
23103 
23104   if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) {
23105     for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
23106          sfp != NULL;
23107          sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext)) {
23108       GetOrgParseSourcesForBioSource (sfp->data.value.ptrvalue, bsp, NULL, sfp, o, tp, source_list);
23109     }
23110   }
23111 }
23112 
23113 
23114 typedef struct parsesrccollection {
23115   ParseSrcPtr src;
23116   TextPortionPtr portion;
23117   ValNodePtr src_list;
23118 } ParseSrcCollectionData, PNTR ParseSrcCollectionPtr;
23119 
23120 
FindParseSourceBioseqCallback(BioseqPtr bsp,Pointer userdata)23121 static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata)
23122 {
23123   ParseSrcCollectionPtr psp;
23124 
23125   if (bsp == NULL || ISA_aa (bsp->mol) || userdata == NULL)
23126   {
23127     return;
23128   }
23129 
23130   psp = (ParseSrcCollectionPtr) userdata;
23131   if (psp->src == NULL) return;
23132 
23133   switch (psp->src->choice)
23134   {
23135     case ParseSrc_defline:
23136       if (!ISA_aa (bsp->mol)) {
23137         GetDeflineSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
23138       }
23139       break;
23140     case ParseSrc_flatfile:
23141       GetFlatFileSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
23142       break;
23143     case ParseSrc_local_id:
23144       if (! ISA_aa (bsp->mol) && bsp->repr != Seq_repr_seg) {
23145         GetLocalIDSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
23146       }
23147       break;
23148     case ParseSrc_file_id:
23149       GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list));
23150       break;
23151     case ParseSrc_general_id:
23152       GetGeneralIDSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list));
23153       break;
23154     case ParseSrc_org:
23155       GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list));
23156       break;
23157     case ParseSrc_comment:
23158       GetCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
23159       break;
23160     case ParseSrc_structured_comment:
23161       GetStructuredCommentSourcesForBioseq(bsp, psp->portion, psp->src->data.ptrvalue, &(psp->src_list));
23162       break;
23163     case ParseSrc_bankit_comment:
23164       if (!ISA_aa (bsp->mol)) {
23165         GetBankitCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
23166       }
23167       break;
23168   }
23169 }
23170 
23171 
GetOrgNamesInRecordCallback(BioSourcePtr biop,Pointer userdata)23172 static void GetOrgNamesInRecordCallback (BioSourcePtr biop, Pointer userdata)
23173 {
23174   ValNodePtr PNTR org_names;
23175 
23176   if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname)
23177       || userdata == NULL)
23178   {
23179     return;
23180   }
23181 
23182   org_names = (ValNodePtr PNTR) userdata;
23183 
23184   ValNodeAddPointer (org_names, 0, biop->org->taxname);
23185 }
23186 
23187 
SetToUpper(CharPtr cp)23188 static void SetToUpper (CharPtr cp)
23189 {
23190   if (cp == NULL) return;
23191   while (*cp != 0) {
23192     if (isalpha (*cp)) {
23193       *cp = toupper (*cp);
23194     }
23195     cp++;
23196   }
23197 }
23198 
23199 
CapitalizeWords(CharPtr string,Boolean punc)23200 static void CapitalizeWords (CharPtr string, Boolean punc)
23201 {
23202   CharPtr cp;
23203   Boolean send_upper = TRUE;
23204 
23205   if (string == NULL) {
23206     return;
23207   }
23208   cp = string;
23209   while (*cp != 0) {
23210     if (isspace (*cp) || (punc && ispunct (*cp))) {
23211       send_upper = TRUE;
23212     } else if (isalpha (*cp)) {
23213       if (send_upper) {
23214         *cp = toupper (*cp);
23215       } else {
23216         *cp = tolower (*cp);
23217       }
23218       send_upper = FALSE;
23219     } else {
23220       send_upper = FALSE;
23221     }
23222     cp++;
23223   }
23224 }
23225 
23226 
23227 NLM_EXTERN void
FixCapitalizationInString(CharPtr PNTR pTitle,Uint2 capitalization,ValNodePtr org_names)23228 FixCapitalizationInString
23229 (CharPtr PNTR pTitle,
23230  Uint2 capitalization,
23231  ValNodePtr   org_names)
23232 {
23233   if (pTitle == NULL || capitalization == Cap_change_none) return;
23234 
23235   switch (capitalization) {
23236     case Cap_change_tolower:
23237       ResetCapitalization (FALSE, *pTitle);
23238       FixAbbreviationsInElement (pTitle);
23239       FixOrgNamesInString (*pTitle, org_names);
23240       break;
23241     case Cap_change_toupper:
23242       SetToUpper (*pTitle);
23243       FixAbbreviationsInElement (pTitle);
23244       FixOrgNamesInString (*pTitle, org_names);
23245       break;
23246     case Cap_change_firstcap:
23247       ResetCapitalization (TRUE, *pTitle);
23248       FixAbbreviationsInElement (pTitle);
23249       FixOrgNamesInString (*pTitle, org_names);
23250       break;
23251     case Cap_change_firstcaprestnochange:
23252       if (*pTitle != NULL && isalpha (**pTitle)) {
23253         **pTitle = toupper (**pTitle);
23254       }
23255       break;
23256     case Cap_change_firstlower_restnochange:
23257       if (*pTitle != NULL && isalpha (**pTitle)) {
23258         **pTitle = tolower (**pTitle);
23259       }
23260       break;
23261     case Cap_change_cap_word_space:
23262       CapitalizeWords (*pTitle, FALSE);
23263       FixAbbreviationsInElement (pTitle);
23264       FixOrgNamesInString (*pTitle, org_names);
23265       break;
23266     case Cap_change_cap_word_space_punc:
23267       CapitalizeWords (*pTitle, TRUE);
23268       FixAbbreviationsInElement (pTitle);
23269       FixOrgNamesInString (*pTitle, org_names);
23270       break;
23271   }
23272 }
23273 
23274 
AddDeflineDestinationsForBioseq(BioseqPtr bsp,ValNodePtr PNTR dest_list)23275 static void AddDeflineDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
23276 {
23277   SeqDescrPtr        sdp;
23278   SeqMgrDescContext  dcontext;
23279 
23280   if (bsp == NULL || dest_list == NULL) {
23281     return;
23282   }
23283 
23284   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
23285   while (sdp != NULL) {
23286     ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
23287     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext);
23288   }
23289 }
23290 
23291 
23292 static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp);
23293 static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp);
23294 
AddFeatureDestinationsForBioseq(BioseqPtr bsp,FeatureFieldLegalPtr featfield,ValNodePtr PNTR dest_list)23295 static void AddFeatureDestinationsForBioseq (BioseqPtr bsp, FeatureFieldLegalPtr featfield, ValNodePtr PNTR dest_list)
23296 {
23297   Int4 featdef;
23298 
23299   if (bsp == NULL || featfield == NULL || dest_list == NULL) return;
23300 
23301   featdef = GetFeatdefFromFeatureType (featfield->type);
23302   if (ISA_aa (bsp->mol)) {
23303     ValNodeLink (dest_list, GetFeatureListForProteinBioseq (featdef, bsp));
23304   } else {
23305     ValNodeLink (dest_list, GetFeatureListForNucleotideBioseq (featdef, bsp));
23306   }
23307 
23308 }
23309 
23310 
GetBioSourceDestinationsForBioseq(BioseqPtr bsp,Uint2 object_type,ValNodePtr PNTR dest_list)23311 static void GetBioSourceDestinationsForBioseq (BioseqPtr bsp, Uint2 object_type, ValNodePtr PNTR dest_list)
23312 {
23313   SeqDescrPtr        sdp;
23314   SeqFeatPtr         sfp;
23315   SeqMgrFeatContext  fcontext;
23316   SeqMgrDescContext  dcontext;
23317 
23318   if (bsp == NULL || dest_list == NULL)
23319   {
23320     return;
23321   }
23322 
23323   if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_descriptor)
23324   {
23325     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
23326     while (sdp != NULL)
23327     {
23328       ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
23329       sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
23330     }
23331   }
23332 
23333   if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_feature)
23334   {
23335     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
23336     while (sfp != NULL)
23337     {
23338       ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp);
23339       sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext);
23340     }
23341   }
23342 }
23343 
23344 
AddParseDestinations(ParseSourceInfoPtr psip,ParseDestPtr dst)23345 static void AddParseDestinations (ParseSourceInfoPtr psip, ParseDestPtr dst)
23346 {
23347   ParseDstOrgPtr o;
23348 
23349   if (psip == NULL || dst == NULL) return;
23350 
23351   switch (dst->choice) {
23352     case ParseDest_defline :
23353       AddDeflineDestinationsForBioseq (psip->bsp, &(psip->dest_list));
23354       break;
23355     case ParseDest_org :
23356       o = (ParseDstOrgPtr) dst->data.ptrvalue;
23357       if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor)
23358           && psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) {
23359         ValNodeAddPointer (&(psip->dest_list), OBJ_SEQDESC, psip->sdp);
23360       } else if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature)
23361                  && psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) {
23362         ValNodeAddPointer (&(psip->dest_list), OBJ_SEQFEAT, psip->sfp);
23363       } else {
23364         GetBioSourceDestinationsForBioseq (psip->bsp, o->type, &(psip->dest_list));
23365       }
23366       break;
23367     case ParseDest_featqual :
23368       AddFeatureDestinationsForBioseq (psip->bsp, dst->data.ptrvalue, &(psip->dest_list));
23369       break;
23370     case ParseDest_comment_descriptor :
23371       AddCommentDescriptorDestinationsForBioseq (psip->bsp, &(psip->dest_list));
23372       break;
23373     case ParseDest_dbxref :
23374       GetBioSourceDestinationsForBioseq (psip->bsp, Object_type_constraint_any, &(psip->dest_list));
23375       break;
23376   }
23377 }
23378 
23379 
SourceHasOneUndeletedDestination(ParseSourceInfoPtr source)23380 static Boolean SourceHasOneUndeletedDestination (ParseSourceInfoPtr source)
23381 {
23382   Int4       num_seen = 0;
23383   ValNodePtr vnp;
23384 
23385   if (source == NULL
23386       || source->dest_list == NULL)
23387   {
23388     return FALSE;
23389   }
23390 
23391   vnp = source->dest_list;
23392   while (vnp != NULL && num_seen < 2)
23393   {
23394     if (vnp->choice > 1)
23395     {
23396       num_seen ++;
23397     }
23398     vnp = vnp->next;
23399   }
23400   if (num_seen == 1)
23401   {
23402     return TRUE;
23403   }
23404   else
23405   {
23406     return FALSE;
23407   }
23408 }
23409 
23410 
CombineSourcesForDestinations(ValNodePtr PNTR source_list)23411 static void CombineSourcesForDestinations (ValNodePtr PNTR source_list)
23412 {
23413   ValNodePtr         source1_vnp, source2_vnp, dest1_vnp, dest2_vnp;
23414   ValNodePtr         source_new, del_vnp;
23415   ParseSourceInfoPtr psip1, psip2, new_psip;
23416   CharPtr            comb_txt;
23417 
23418   for (source1_vnp = *source_list;
23419        source1_vnp != NULL;
23420        source1_vnp = source1_vnp->next)
23421   {
23422     psip1 = (ParseSourceInfoPtr) source1_vnp->data.ptrvalue;
23423     if (psip1 == NULL || psip1->dest_list == NULL)
23424     {
23425       continue;
23426     }
23427     for (source2_vnp = source1_vnp->next;
23428          source2_vnp != NULL;
23429          source2_vnp = source2_vnp->next)
23430     {
23431       if (source2_vnp->choice > 0)
23432       {
23433         /* already marked for deletion */
23434         continue;
23435       }
23436       psip2 = (ParseSourceInfoPtr) source2_vnp->data.ptrvalue;
23437       if (psip2 == NULL || psip2->dest_list == NULL)
23438       {
23439         continue;
23440       }
23441       for (dest1_vnp = psip1->dest_list;
23442            dest1_vnp != NULL;
23443            dest1_vnp = dest1_vnp->next)
23444       {
23445         if (dest1_vnp->choice == 0)
23446         {
23447           /* already marked for deletion */
23448           continue;
23449         }
23450         for (dest2_vnp = psip2->dest_list;
23451              dest2_vnp != NULL;
23452              dest2_vnp = dest2_vnp->next)
23453         {
23454           if (dest2_vnp->choice == 0)
23455           {
23456             /* already marked for deletion */
23457             continue;
23458           }
23459           if (dest1_vnp->choice == dest2_vnp->choice
23460               && dest1_vnp->data.ptrvalue == dest2_vnp->data.ptrvalue)
23461           {
23462             comb_txt = (CharPtr) (MemNew (sizeof (Char)
23463                                   * (StringLen (psip1->parse_src_txt)
23464                                      + StringLen (psip2->parse_src_txt)
23465                                      + 2)));
23466             StringCpy (comb_txt, psip1->parse_src_txt);
23467             StringCat (comb_txt, ";");
23468             StringCat (comb_txt, psip2->parse_src_txt);
23469 
23470             /* If the first source has a single destination, then we can
23471              * add the text from the second source to the first and remove
23472              * the destination from the second source.
23473              */
23474             if (SourceHasOneUndeletedDestination (psip1))
23475             {
23476 
23477               psip1->parse_src_txt = MemFree (psip1->parse_src_txt);
23478               psip1->parse_src_txt = comb_txt;
23479               dest2_vnp->choice = 0;
23480             }
23481             /* If the first source has more than one destination and
23482              * the second source has a single destination, then we can
23483              * remove the repeated desination from the first source
23484              * and add the text from the first source to the second source.
23485              */
23486             else if (SourceHasOneUndeletedDestination (psip2))
23487             {
23488               psip2->parse_src_txt = MemFree (psip2->parse_src_txt);
23489               psip2->parse_src_txt = comb_txt;
23490               dest1_vnp->choice = 0;
23491             }
23492             /* If the first and second sources have multiple destinations,
23493              * we need to remove the repeated destination from both the first
23494              * and second source and create a new source with the combined
23495              * text for just the repeated destination.
23496              */
23497             else
23498             {
23499               new_psip = ParseSourceInfoNew (NULL, NULL, NULL, NULL, comb_txt);
23500               ValNodeAddPointer (&(new_psip->dest_list),
23501                                  dest1_vnp->choice,
23502                                  dest1_vnp->data.ptrvalue);
23503               dest1_vnp->choice = 0;
23504               dest2_vnp->choice = 0;
23505               source_new = ValNodeNew (NULL);
23506               source_new->choice = 0;
23507               source_new->data.ptrvalue = new_psip;
23508               source_new->next = source1_vnp->next;
23509               source1_vnp->next = source_new;
23510             }
23511           }
23512         }
23513       }
23514 
23515       del_vnp = ValNodeExtractList (&(psip1->dest_list), 0);
23516       del_vnp = ValNodeFree (del_vnp);
23517       if (psip1->dest_list == NULL)
23518       {
23519         source1_vnp->choice = 1;
23520       }
23521       del_vnp = ValNodeExtractList (&(psip2->dest_list), 0);
23522       del_vnp = ValNodeFree (del_vnp);
23523       if (psip2->dest_list == NULL)
23524       {
23525         source2_vnp->choice = 1;
23526       }
23527     }
23528   }
23529 
23530   /* now remove sources deleted */
23531   del_vnp = ValNodeExtractList (source_list, 1);
23532   del_vnp = ParseSourceListFree (del_vnp);
23533 }
23534 
23535 
GetPartsForSourceDescriptorOnSegSet(SeqDescrPtr sdp)23536 static BioseqSetPtr GetPartsForSourceDescriptorOnSegSet (SeqDescrPtr sdp)
23537 {
23538   ObjValNodePtr ovp;
23539   BioseqSetPtr  bssp;
23540   SeqEntryPtr   sep;
23541 
23542   if (sdp == NULL || sdp->extended != 1) {
23543     return NULL;
23544   }
23545   ovp = (ObjValNodePtr) sdp;
23546   if (ovp->idx.parenttype != OBJ_BIOSEQSET || ovp->idx.parentptr == NULL) {
23547     return NULL;
23548   }
23549   bssp = (BioseqSetPtr) ovp->idx.parentptr;
23550 
23551   if (bssp->_class == BioseqseqSet_class_nuc_prot
23552       && IS_Bioseq_set (bssp->seq_set)
23553       && bssp->seq_set->data.ptrvalue != NULL) {
23554     bssp = (BioseqSetPtr) bssp->seq_set->data.ptrvalue;
23555   }
23556 
23557   if (bssp->_class == BioseqseqSet_class_segset) {
23558     sep = bssp->seq_set;
23559     while (sep != NULL) {
23560       if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL) {
23561         bssp = (BioseqSetPtr) sep->data.ptrvalue;
23562         if (bssp->_class == BioseqseqSet_class_parts) {
23563           return bssp;
23564         }
23565       }
23566       sep = sep->next;
23567     }
23568   }
23569 
23570   return NULL;
23571 }
23572 
23573 
FindSourceDescriptorInSeqEntry(SeqEntryPtr sep)23574 static SeqDescrPtr FindSourceDescriptorInSeqEntry (SeqEntryPtr sep)
23575 {
23576   BioseqPtr    bsp;
23577   BioseqSetPtr bssp;
23578   SeqDescrPtr  sdp = NULL;
23579 
23580   if (sep != NULL && sep->data.ptrvalue != NULL) {
23581     if (IS_Bioseq (sep)) {
23582       bsp = (BioseqPtr) sep->data.ptrvalue;
23583       sdp = bsp->descr;
23584     } else if (IS_Bioseq_set (sep)) {
23585       bssp = (BioseqSetPtr) sep->data.ptrvalue;
23586       sdp = bssp->descr;
23587     }
23588     while (sdp != NULL && sdp->choice != Seq_descr_source)
23589     {
23590       sdp = sdp->next;
23591     }
23592   }
23593   return sdp;
23594 }
23595 
23596 
PropagateToSeqEntry(SeqEntryPtr sep,SeqDescrPtr sdp)23597 static SeqDescrPtr PropagateToSeqEntry (SeqEntryPtr sep, SeqDescrPtr sdp)
23598 {
23599   BioseqPtr    bsp;
23600   BioseqSetPtr bssp;
23601   SeqDescrPtr  new_sdp = NULL;
23602 
23603   if (sep != NULL && sep->data.ptrvalue != NULL) {
23604     if (IS_Bioseq (sep)) {
23605       bsp = (BioseqPtr) sep->data.ptrvalue;
23606       new_sdp = AsnIoMemCopy ((Pointer) sdp,
23607                               (AsnReadFunc) SeqDescrAsnRead,
23608                               (AsnWriteFunc) SeqDescrAsnWrite);
23609       ValNodeLink (&(bsp->descr), new_sdp);
23610     } else if (IS_Bioseq_set (sep)) {
23611       bssp = (BioseqSetPtr) sep->data.ptrvalue;
23612       new_sdp = AsnIoMemCopy ((Pointer) sdp,
23613                               (AsnReadFunc) SeqDescrAsnRead,
23614                               (AsnWriteFunc) SeqDescrAsnWrite);
23615       ValNodeLink (&(bssp->descr), new_sdp);
23616     }
23617   }
23618   return new_sdp;
23619 }
23620 
23621 
PropagateSourceOnSegSetForParse(ValNodePtr parse_source_list)23622 static void PropagateSourceOnSegSetForParse (ValNodePtr parse_source_list)
23623 {
23624   ParseSourceInfoPtr psip;
23625   ValNodePtr         vnp_src, vnp_dst;
23626   SeqDescrPtr        sdp, other_sdp;
23627   SeqEntryPtr        sep;
23628   ValNodePtr         extra_dests = NULL;
23629   BioseqSetPtr       parts_bssp;
23630 
23631   for (vnp_src = parse_source_list; vnp_src != NULL; vnp_src = vnp_src->next) {
23632     psip = (ParseSourceInfoPtr) vnp_src->data.ptrvalue;
23633     if (psip != NULL) {
23634       for (vnp_dst = psip->dest_list; vnp_dst != NULL; vnp_dst = vnp_dst->next) {
23635         if (vnp_dst->choice == OBJ_SEQDESC) {
23636           sdp = (SeqDescrPtr) vnp_dst->data.ptrvalue;
23637           if (sdp != NULL && sdp->choice == Seq_descr_source) {
23638             parts_bssp = GetPartsForSourceDescriptorOnSegSet (sdp);
23639             if (parts_bssp != NULL) {
23640               for (sep = parts_bssp->seq_set; sep != NULL; sep = sep->next) {
23641                 if (IS_Bioseq(sep) && sep->data.ptrvalue == psip->bsp) {
23642                   other_sdp = FindSourceDescriptorInSeqEntry (sep);
23643                   if (other_sdp == NULL) {
23644                     other_sdp = PropagateToSeqEntry (sep, sdp);
23645                     ValNodeAddPointer (&extra_dests, OBJ_SEQDESC, other_sdp);
23646                   }
23647                 }
23648               }
23649 
23650               /* set choice to 0 so master won't be a destination */
23651               vnp_dst->choice = 0;
23652 
23653             }
23654           }
23655         }
23656       }
23657       /* add extra destinations to list */
23658       ValNodeLink (&psip->dest_list, extra_dests);
23659       extra_dests = NULL;
23660     }
23661   }
23662 
23663 }
23664 
23665 
23666 
GetDBxrefFromBioSource(BioSourcePtr biop,CharPtr db_name)23667 NLM_EXTERN CharPtr GetDBxrefFromBioSource (BioSourcePtr biop, CharPtr db_name)
23668 {
23669   CharPtr    rval = NULL;
23670   ValNodePtr vnp;
23671   DbtagPtr   dbtag;
23672 
23673   if (biop == NULL || biop->org == NULL || StringHasNoText (db_name)) {
23674     return NULL;
23675   }
23676   for (vnp = biop->org->db; vnp != NULL && rval == NULL; vnp = vnp->next) {
23677     dbtag = (DbtagPtr) vnp->data.ptrvalue;
23678     if (dbtag != NULL && StringCmp (db_name, dbtag->db) == 0) {
23679       rval = GetObjectIdString (dbtag->tag);
23680     }
23681   }
23682   return rval;
23683 }
23684 
23685 
SetDBxrefForBioSource(BioSourcePtr biop,CharPtr db_name,CharPtr str,Uint2 existing_text)23686 NLM_EXTERN Boolean SetDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, CharPtr str, Uint2 existing_text)
23687 {
23688   ValNodePtr    dbx;
23689   DbtagPtr      dbtag;
23690   Boolean       found = FALSE;
23691   Char          buf[20];
23692   Boolean       rval = FALSE;
23693 
23694   if (biop == NULL || StringHasNoText (db_name) || StringHasNoText (str)) {
23695     return FALSE;
23696   }
23697 
23698   if (biop->org == NULL)
23699   {
23700     biop->org = OrgRefNew();
23701   }
23702   dbx = biop->org->db;
23703   while (dbx != NULL && !found)
23704   {
23705     dbtag = (DbtagPtr) dbx->data.ptrvalue;
23706     if (dbtag != NULL && dbtag->tag != NULL
23707         && StringCmp (dbtag->db, db_name) == 0)
23708     {
23709       found = TRUE;
23710     }
23711     if (!found)
23712     {
23713       dbx = dbx->next;
23714     }
23715   }
23716   if (!found)
23717   {
23718     dbtag = DbtagNew();
23719     dbtag->db = StringSave (db_name);
23720     ValNodeAddPointer (&(biop->org->db), 0, dbtag);
23721   }
23722   if (dbtag->tag == NULL)
23723   {
23724     dbtag->tag = ObjectIdNew();
23725   }
23726   /* if it was a number before, make it a string now */
23727   if (dbtag->tag->id > 0 && dbtag->tag->str == NULL)
23728   {
23729     sprintf (buf, "%d", dbtag->tag->id);
23730     dbtag->tag->id = 0;
23731     dbtag->tag->str = StringSave (buf);
23732   }
23733   rval = SetStringValue (&(dbtag->tag->str), str, existing_text);
23734   return rval;
23735 }
23736 
23737 
RemoveDBxrefForBioSource(BioSourcePtr biop,CharPtr db_name,StringConstraintPtr scp)23738 NLM_EXTERN Boolean RemoveDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, StringConstraintPtr scp)
23739 {
23740   ValNodePtr    dbx, prev = NULL, dbx_next;
23741   DbtagPtr      dbtag;
23742   CharPtr       str;
23743   Boolean       found = FALSE;
23744 
23745   if (biop == NULL || StringHasNoText (db_name)) {
23746     return FALSE;
23747   }
23748 
23749   if (biop->org == NULL)
23750   {
23751     biop->org = OrgRefNew();
23752   }
23753   dbx = biop->org->db;
23754   for (dbx = biop->org->db; dbx != NULL; dbx = dbx_next)
23755   {
23756     dbx_next = dbx->next;
23757     dbtag = (DbtagPtr) dbx->data.ptrvalue;
23758     str = NULL;
23759     if (dbtag != NULL && dbtag->tag != NULL
23760       && StringCmp (dbtag->db, db_name) == 0
23761       && (scp == NULL || ((str = GetDbtagString(dbtag)) != NULL && DoesStringMatchConstraint (str, scp))))
23762     {
23763       if (prev == NULL) {
23764         biop->org->db = dbx->next;
23765       } else {
23766         prev->next = dbx->next;
23767       }
23768       dbx->data.ptrvalue = DbtagFree (dbx->data.ptrvalue);
23769       dbx = ValNodeFree (dbx);
23770       found = TRUE;
23771     }
23772     else
23773     {
23774       prev = dbx;
23775     }
23776     str = MemFree (str);
23777   }
23778   return found;
23779 }
23780 
23781 
SetFieldForDestList(ValNodePtr dest_list,ParseDestPtr field,CharPtr str,Uint2 existing_text)23782 static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharPtr str, Uint2 existing_text)
23783 {
23784   ValNodePtr vnp;
23785   SeqDescrPtr sdp;
23786   ObjValNodePtr ovp;
23787   CharPtr     cp;
23788   BioSourcePtr biop;
23789   ParseDstOrgPtr o;
23790   FeatureFieldLegalPtr fl;
23791   FeatureField f;
23792   Boolean      was_empty;
23793   Int4         num_succeeded = 0;
23794 
23795   if (dest_list == NULL || field == NULL) return 0;
23796 
23797   switch (field->choice) {
23798     case ParseDest_defline :
23799       for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23800         if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) {
23801           sdp = (SeqDescrPtr) vnp->data.ptrvalue;
23802           if (sdp->choice == Seq_descr_title) {
23803             cp = sdp->data.ptrvalue;
23804             if (SetStringValue (&cp, str, existing_text)) {
23805               num_succeeded++;
23806             }
23807             sdp->data.ptrvalue = cp;
23808             RemoveAutodefObjectsForDesc(sdp);
23809           }
23810         }
23811       }
23812       break;
23813     case ParseDest_org :
23814       o = (ParseDstOrgPtr) field->data.ptrvalue;
23815       if (o != NULL) {
23816         for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23817           biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue);
23818           if (SetSourceQualInBioSource (biop, o->field, NULL, str, existing_text)) {
23819             num_succeeded++;
23820           }
23821         }
23822       }
23823       break;
23824     case ParseDest_featqual:
23825       fl = (FeatureFieldLegalPtr) field->data.ptrvalue;
23826       if (fl != NULL) {
23827         f.type = fl->type;
23828         f.field = ValNodeNew(NULL);
23829         f.field->next = NULL;
23830         f.field->choice = FeatQualChoice_legal_qual;
23831         f.field->data.intvalue = fl->field;
23832         for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23833           if (SetQualOnFeature (vnp->data.ptrvalue, &f, NULL, str, existing_text)) {
23834             num_succeeded++;
23835           }
23836         }
23837         f.field = ValNodeFree (f.field);
23838       }
23839       break;
23840     case ParseDest_comment_descriptor:
23841       for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23842         sdp = vnp->data.ptrvalue;
23843         if (StringHasNoText (sdp->data.ptrvalue)) {
23844           was_empty = TRUE;
23845         } else {
23846           was_empty = FALSE;
23847         }
23848         cp = sdp->data.ptrvalue;
23849         if (SetStringValue (&cp, str, existing_text)) {
23850           num_succeeded++;
23851         }
23852         sdp->data.ptrvalue = cp;
23853         if (was_empty) {
23854           ovp = (ObjValNodePtr) sdp;
23855           ovp->idx.deleteme = FALSE;
23856         }
23857       }
23858       break;
23859     case ParseDest_dbxref:
23860       if (!StringHasNoText (field->data.ptrvalue)) {
23861         for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23862           biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue);
23863           if (SetDBxrefForBioSource (biop, field->data.ptrvalue, str, existing_text)) {
23864             num_succeeded++;
23865           }
23866         }
23867       }
23868       break;
23869   }
23870   return num_succeeded;
23871 }
23872 
23873 
23874 
AddToSampleForDestList(AECRSamplePtr sample,ValNodePtr dest_list,ParseDestPtr field)23875 static void AddToSampleForDestList (AECRSamplePtr sample, ValNodePtr dest_list, ParseDestPtr field)
23876 {
23877   ValNodePtr vnp;
23878   SeqDescrPtr sdp;
23879   BioSourcePtr biop;
23880   ParseDstOrgPtr o;
23881   FeatureFieldLegalPtr fl;
23882   FeatureField f;
23883 
23884   if (dest_list == NULL || field == NULL || sample == NULL) return;
23885 
23886   switch (field->choice) {
23887     case ParseDest_defline :
23888       for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23889         if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) {
23890           sdp = (SeqDescrPtr) vnp->data.ptrvalue;
23891           if (sdp->choice == Seq_descr_title) {
23892             AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue));
23893           }
23894         }
23895       }
23896       break;
23897     case ParseDest_org :
23898       o = (ParseDstOrgPtr) field->data.ptrvalue;
23899       if (o != NULL) {
23900         for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23901           biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue);
23902           AddTextToAECRSample (sample, GetSourceQualFromBioSource (biop, o->field, NULL));
23903         }
23904       }
23905       break;
23906     case ParseDest_featqual:
23907       fl = (FeatureFieldLegalPtr) field->data.ptrvalue;
23908       if (fl != NULL) {
23909         f.type = fl->type;
23910         f.field = ValNodeNew(NULL);
23911         f.field->next = NULL;
23912         f.field->choice = FeatQualChoice_legal_qual;
23913         f.field->data.intvalue = fl->field;
23914         for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23915           AddTextToAECRSample (sample, GetQualFromFeature (vnp->data.ptrvalue, &f, NULL));
23916         }
23917         f.field = ValNodeFree (f.field);
23918       }
23919       break;
23920     case ParseDest_comment_descriptor:
23921       for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23922         sdp = (SeqDescrPtr) vnp->data.ptrvalue;
23923         AddTextToAECRSample (sample, StringSave (sdp->data.ptrvalue));
23924       }
23925       break;
23926     case ParseDest_dbxref:
23927       if (!StringHasNoText (field->data.ptrvalue)) {
23928         for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
23929           biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue);
23930           AddTextToAECRSample (sample, GetDBxrefFromBioSource (biop, field->data.ptrvalue));
23931         }
23932       }
23933       break;
23934   }
23935 }
23936 
23937 
GetParseBioSourceField(ValNodePtr field,BioSourcePtr biop)23938 static CharPtr GetParseBioSourceField (ValNodePtr field, BioSourcePtr biop)
23939 {
23940   CharPtr str = NULL;
23941 
23942   if (field == NULL || biop == NULL) {
23943     return NULL;
23944   }
23945 
23946   if (field->choice == ParseSrcOrgChoice_source_qual) {
23947     str = GetSourceQualFromBioSource (biop, field, NULL);
23948   } else if (field->choice == ParseSrcOrgChoice_taxname_after_binomial) {
23949     if (biop->org != NULL) {
23950       str = StringSave (GetTextAfterNomial (biop->org->taxname));
23951     }
23952   }
23953   return str;
23954 }
23955 
23956 
SetParseBioSourceField(ValNodePtr field,CharPtr str,BioSourcePtr biop)23957 static Boolean SetParseBioSourceField (ValNodePtr field, CharPtr str, BioSourcePtr biop)
23958 {
23959   Boolean rval = FALSE;
23960   CharPtr after, new_val;
23961   Int4    len, new_len;
23962 
23963   if (field == NULL || biop == NULL) {
23964     return FALSE;
23965   }
23966 
23967   if (field->choice == ParseSrcOrgChoice_source_qual) {
23968     rval = SetSourceQualInBioSource (biop, field, NULL, str, ExistingTextOption_replace_old);
23969   } else if (field->choice == ParseSrcOrgChoice_taxname_after_binomial) {
23970     if (biop->org != NULL) {
23971       after = GetTextAfterNomial (biop->org->taxname);
23972       len = after - biop->org->taxname;
23973       new_len = len + StringLen (str) + 2;
23974       /* note - do not need to free after, because after is a pointer to a position in biop->org->taxname */
23975       new_val = (CharPtr) MemNew (sizeof (Char) * new_len);
23976       StringNCpy (new_val, biop->org->taxname, len);
23977       new_val[len] = 0;
23978       if (!StringHasNoText (str)) {
23979         StringCat (new_val, " ");
23980         StringCat (new_val, str);
23981       }
23982       biop->org->taxname = MemFree (biop->org->taxname);
23983       biop->org->taxname = new_val;
23984       rval = TRUE;
23985     }
23986   }
23987   return rval;
23988 }
23989 
23990 
StripFieldForSrcList(ParseSourceInfoPtr psip,ParseSrcPtr field,TextPortionPtr text_portion)23991 static void StripFieldForSrcList (ParseSourceInfoPtr psip, ParseSrcPtr field, TextPortionPtr text_portion)
23992 {
23993   CharPtr     str;
23994   ParseSrcOrgPtr o;
23995   BioSourcePtr biop;
23996 
23997   if (psip == NULL || field == NULL || text_portion == NULL) return;
23998 
23999   switch (field->choice) {
24000     case ParseSrc_defline :
24001       if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_title) {
24002         ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion);
24003       }
24004       break;
24005     case ParseSrc_org :
24006       o = (ParseSrcOrgPtr) field->data.ptrvalue;
24007       if (o != NULL && o->field != NULL) {
24008         biop = NULL;
24009         if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) {
24010           biop = (BioSourcePtr) psip->sdp->data.ptrvalue;
24011         } else if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) {
24012           biop = (BioSourcePtr) psip->sfp->data.value.ptrvalue;
24013         }
24014         if (biop != NULL) {
24015           str = GetParseBioSourceField (o->field, biop);
24016           ReplaceStringForParse (str, text_portion);
24017           SetParseBioSourceField (o->field, str, biop);
24018           str = MemFree (str);
24019         }
24020       }
24021       break;
24022     case ParseSrc_comment:
24023       if (psip->sdp != NULL) {
24024         if (psip->sdp->choice == Seq_descr_user) {
24025           StripBankitCommentForParse (psip->sdp, text_portion);
24026         } else if (psip->sdp->choice == Seq_descr_comment) {
24027           ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion);
24028         }
24029       }
24030       if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_COMMENT) {
24031         ReplaceStringForParse (psip->sfp->data.value.ptrvalue, text_portion);
24032       }
24033       break;
24034     case ParseSrc_bankit_comment:
24035       if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) {
24036         StripBankitCommentForParse (psip->sdp, text_portion);
24037       }
24038       break;
24039     case ParseSrc_structured_comment:
24040       if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) {
24041         StripStructuredCommentForParse (psip->sdp, field->data.ptrvalue, text_portion);
24042       }
24043       break;
24044   }
24045 }
24046 
24047 
24048 
GetExistingTextForParseAction(ParseActionPtr action,SeqEntryPtr sep)24049 NLM_EXTERN AECRSamplePtr GetExistingTextForParseAction (ParseActionPtr action, SeqEntryPtr sep)
24050 {
24051   ParseSrcCollectionData psd;
24052   ParseSourceInfoPtr     psip;
24053   ValNodePtr             vnp;
24054   ValNodePtr             dest_list = NULL;
24055   AECRSamplePtr          sample;
24056 
24057   if (action == NULL || sep == NULL) return 0;
24058 
24059   psd.src = action->src;
24060   psd.portion = action->portion;
24061   psd.src_list = NULL;
24062 
24063   /* first, we need to get a list of the parse sources */
24064   VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback);
24065 
24066 
24067   /* for each parse source, get a list of the destinations */
24068   for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next)
24069   {
24070     if (vnp->data.ptrvalue == NULL) continue;
24071     psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
24072 
24073     /* find destinations */
24074     AddParseDestinations (psip, action->dest);
24075 
24076     /* add destinations to list */
24077     ValNodeLink (&dest_list, psip->dest_list);
24078     psip->dest_list = NULL;
24079   }
24080 
24081   psd.src_list = ParseSourceListFree (psd.src_list);
24082 
24083   /* get sample for dest_list */
24084   sample = AECRSampleNew ();
24085   AddToSampleForDestList (sample, dest_list, action->dest);
24086   dest_list = ValNodeFree (dest_list);
24087   return sample;
24088 }
24089 
24090 
ApplyParseActionToSeqEntry(ParseActionPtr action,SeqEntryPtr sep)24091 static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep)
24092 {
24093   ParseSrcCollectionData psd;
24094   ParseSourceInfoPtr     psip;
24095   ValNodePtr             orgnames = NULL, source_list_for_removal = NULL, vnp;
24096   Int4                   num_succeeded = 0;
24097 
24098   if (action == NULL || sep == NULL) return 0;
24099 
24100   psd.src = action->src;
24101   psd.portion = action->portion;
24102   psd.src_list = NULL;
24103 
24104   /* first, we need to get a list of the parse sources */
24105   VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback);
24106 
24107   if (action->capitalization != Cap_change_none) {
24108     /* if we will be fixing capitalization, get org names to use in fixes */
24109     VisitBioSourcesInSep (sep, &orgnames, GetOrgNamesInRecordCallback);
24110   }
24111 
24112   /* for each parse source, we need to get a list of the destinations */
24113   for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next)
24114   {
24115     if (vnp->data.ptrvalue == NULL) continue;
24116     psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
24117     if (action->remove_from_parsed) {
24118         ValNodeAddPointer (&source_list_for_removal, 0, ParseSourceInfoCopy (psip));
24119     }
24120     /* fix source text */
24121     FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames);
24122     ApplyTextTransformsToString (&(psip->parse_src_txt), action->transform);
24123 
24124     /* find destinations */
24125     AddParseDestinations (psip, action->dest);
24126 
24127   }
24128 
24129   /* free orgname list if we created it */
24130   orgnames = ValNodeFree (orgnames);
24131 
24132   CombineSourcesForDestinations (&(psd.src_list));
24133 
24134   if (action->dest->choice == ParseDest_org) {
24135     PropagateSourceOnSegSetForParse (psd.src_list);
24136   }
24137 
24138   /* now do the parsing */
24139   for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) {
24140     psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
24141     num_succeeded += SetFieldForDestList (psip->dest_list, action->dest, psip->parse_src_txt, action->existing_text);
24142   }
24143 
24144   /* now remove strings from sources */
24145   for (vnp = source_list_for_removal; vnp != NULL; vnp = vnp->next)
24146   {
24147     if (vnp->data.ptrvalue == NULL) continue;
24148     psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
24149     StripFieldForSrcList (psip, action->src, action->portion);
24150   }
24151 
24152   psd.src_list = ParseSourceListFree (psd.src_list);
24153   return num_succeeded;
24154 }
24155 
24156 
SetCdRegionGeneticCode(SeqFeatPtr cds)24157 static void SetCdRegionGeneticCode (SeqFeatPtr cds)
24158 {
24159   CdRegionPtr crp;
24160   SeqEntryPtr parent_sep;
24161   BioseqPtr   bsp;
24162   Int4        genCode;
24163   ValNodePtr  code, vnp;
24164 
24165   if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return;
24166   if (cds->data.value.ptrvalue == NULL) {
24167     cds->data.value.ptrvalue = CdRegionNew();
24168   }
24169   crp = (CdRegionPtr) cds->data.value.ptrvalue;
24170   bsp = BioseqFindFromSeqLoc (cds->location);
24171   if (bsp == NULL) return;
24172   parent_sep = GetBestTopParentForData (bsp->idx.entityID, bsp);
24173   genCode = SeqEntryToGeneticCode (parent_sep, NULL, NULL, 0);
24174 
24175   code = ValNodeNew (NULL);
24176   if (code != NULL) {
24177     code->choice = 254;
24178     vnp = ValNodeNew (NULL);
24179     code->data.ptrvalue = vnp;
24180     if (vnp != NULL) {
24181       vnp->choice = 2;
24182       vnp->data.intvalue = genCode;
24183     }
24184   }
24185   crp->genetic_code = code;
24186 }
24187 
24188 
CreateDataForFeature(SeqFeatPtr sfp,Int4 feature_type)24189 static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type)
24190 {
24191   Int4 featdef, seqfeattype;
24192   CharPtr    label = NULL;
24193   RnaRefPtr  rrp;
24194   RNAGenPtr  rgp;
24195   ImpFeatPtr ifp;
24196 
24197   featdef = GetFeatdefFromFeatureType (feature_type);
24198   sfp->idx.subtype = featdef;
24199   seqfeattype = FindFeatFromFeatDefType (featdef);
24200   switch (seqfeattype) {
24201     case SEQFEAT_GENE:
24202       sfp->data.value.ptrvalue = GeneRefNew();
24203       break;
24204     case SEQFEAT_CDREGION:
24205       sfp->data.value.ptrvalue = CdRegionNew();
24206       SetCdRegionGeneticCode (sfp);
24207       break;
24208     case SEQFEAT_RNA:
24209       rrp = RnaRefNew();
24210       rrp->ext.choice = 0;
24211       sfp->data.value.ptrvalue = rrp;
24212       switch (featdef) {
24213         case FEATDEF_preRNA:
24214           rrp->type = RNA_TYPE_premsg;
24215           break;
24216         case FEATDEF_mRNA:
24217           rrp->type = RNA_TYPE_mRNA;
24218           break;
24219         case FEATDEF_tRNA:
24220           rrp->type = RNA_TYPE_tRNA;
24221           break;
24222         case FEATDEF_rRNA:
24223           rrp->type = RNA_TYPE_rRNA;
24224           break;
24225         case FEATDEF_snRNA:
24226           rrp->type = RNA_TYPE_ncRNA;
24227           SetncRNAClass (rrp, NULL, "snRNA", ExistingTextOption_replace_old);
24228           break;
24229         case FEATDEF_scRNA:
24230           rrp->type = RNA_TYPE_ncRNA;
24231           SetncRNAClass (rrp, NULL, "scRNA", ExistingTextOption_replace_old);
24232           break;
24233         case FEATDEF_tmRNA:
24234           rrp->type = RNA_TYPE_tmRNA;
24235           rgp = RNAGenNew ();
24236           rrp->ext.choice = 3;
24237           rrp->ext.value.ptrvalue = rgp;
24238           break;
24239         case FEATDEF_ncRNA:
24240           rrp->type = RNA_TYPE_ncRNA;
24241           rgp = RNAGenNew ();
24242           rrp->ext.choice = 3;
24243           rrp->ext.value.ptrvalue = rgp;
24244           break;
24245         case FEATDEF_otherRNA:
24246           rrp->type = RNA_TYPE_misc_RNA;
24247           rgp = RNAGenNew();
24248           rrp->ext.choice = 3;
24249           rrp->ext.value.ptrvalue = rgp;
24250           break;
24251       }
24252       break;
24253     case SEQFEAT_IMP:
24254       ifp = ImpFeatNew();
24255       sfp->data.value.ptrvalue = ifp;
24256       label = GetFeatureNameFromFeatureType (feature_type);
24257       ifp->key = StringSave (label);
24258       break;
24259   }
24260 }
24261 
24262 
LocationFromApplyFeatureAction(BioseqPtr bsp,ApplyFeatureActionPtr action)24263 static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action)
24264 {
24265   LocationIntervalPtr l;
24266   SeqLocPtr slp = NULL;
24267   Uint1 strand = Seq_strand_plus;
24268   Int4  from, to;
24269 
24270   if (bsp == NULL || action == NULL || action->location == NULL) return NULL;
24271 
24272   if (!action->plus_strand) {
24273     strand = Seq_strand_minus;
24274   }
24275   if (action->location->choice == LocationChoice_interval) {
24276     l = (LocationIntervalPtr) action->location->data.ptrvalue;
24277     if (l != NULL) {
24278       from = MIN (l->from, l->to) - 1;
24279       to = MAX (l->from, l->to) - 1;
24280       slp = SeqLocIntNew (from, to, strand, SeqIdFindWorst (bsp->id));
24281     }
24282     SetSeqLocPartial (slp, action->partial5, action->partial3);
24283   } else if (action->location->choice == LocationChoice_whole_sequence) {
24284     slp = SeqLocIntNew (0, bsp->length - 1, strand, SeqIdFindWorst (bsp->id));
24285     SetSeqLocPartial (slp, action->partial5, action->partial3);
24286   } else if (action->location->choice == LocationChoice_point) {
24287     AddSeqLocPoint (&slp, SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0))),
24288                     action->location->data.intvalue, FALSE, TRUE, strand);
24289   }
24290   return slp;
24291 }
24292 
24293 
OkToApplyToBioseq(ApplyFeatureActionPtr action,BioseqPtr bsp)24294 static Boolean OkToApplyToBioseq (ApplyFeatureActionPtr action, BioseqPtr bsp)
24295 {
24296   SeqFeatPtr sfp;
24297   SeqMgrFeatContext context;
24298   Int4 featdef;
24299   Boolean rval = TRUE;
24300 
24301   if (action == NULL || bsp == NULL) return FALSE;
24302 
24303   if (!action->add_redundant) {
24304     featdef = GetFeatdefFromFeatureType (action->type);
24305     sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context);
24306     if (sfp != NULL) {
24307       rval = FALSE;
24308     }
24309   }
24310   return rval;
24311 }
24312 
AddParts(ApplyFeatureActionPtr action,BioseqSetPtr parts,ValNodePtr PNTR bsp_list)24313 static void AddParts (ApplyFeatureActionPtr action, BioseqSetPtr parts, ValNodePtr PNTR bsp_list)
24314 {
24315   SeqEntryPtr sep;
24316   Int4         seg_num;
24317 
24318   if (action == NULL || !action->apply_to_parts
24319       || parts == NULL || parts->_class != BioseqseqSet_class_parts
24320       || bsp_list == NULL) {
24321     return;
24322   }
24323 
24324   if (action->only_seg_num > -1) {
24325     seg_num = 0;
24326     sep = parts->seq_set;
24327     while (seg_num < action->only_seg_num && sep != NULL) {
24328       sep = sep->next;
24329       seg_num++;
24330     }
24331     if (sep != NULL && IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) {
24332       ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue);
24333     }
24334   } else {
24335     for (sep = parts->seq_set; sep != NULL; sep = sep->next) {
24336       if (IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) {
24337         ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue);
24338       }
24339     }
24340   }
24341 }
24342 
24343 
AddSequenceOrParts(ApplyFeatureActionPtr action,BioseqPtr bsp,ValNodePtr PNTR bsp_list)24344 static void AddSequenceOrParts (ApplyFeatureActionPtr action, BioseqPtr bsp, ValNodePtr PNTR bsp_list)
24345 {
24346   BioseqSetPtr bssp, parts;
24347   SeqEntryPtr sep;
24348 
24349   if (action == NULL || bsp == NULL || bsp_list == NULL) return;
24350 
24351   if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) {
24352     bssp = (BioseqSetPtr) bsp->idx.parentptr;
24353     if (bssp->_class == BioseqseqSet_class_segset) {
24354       if (action->apply_to_parts) {
24355         sep = bssp->seq_set;
24356         while (sep != NULL && !IS_Bioseq_set (sep)) {
24357           sep = sep->next;
24358         }
24359         if (sep != NULL) {
24360           AddParts (action, sep->data.ptrvalue, bsp_list);
24361         }
24362       } else {
24363         if (OkToApplyToBioseq (action, bsp)) {
24364           ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp);
24365         }
24366       }
24367     } else if (bssp->_class == BioseqseqSet_class_parts) {
24368       if (action->apply_to_parts) {
24369         AddParts (action, bssp, bsp_list);
24370       } else {
24371         parts = bssp;
24372         if (parts->idx.parenttype == OBJ_BIOSEQSET && parts->idx.parentptr != NULL) {
24373           bssp = (BioseqSetPtr) parts->idx.parentptr;
24374           if (IS_Bioseq (bssp->seq_set) && OkToApplyToBioseq (action, bssp->seq_set->data.ptrvalue)) {
24375             ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp_list);
24376           }
24377         }
24378       }
24379     } else {
24380       if (OkToApplyToBioseq (action, bsp)) {
24381         ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp);
24382       }
24383     }
24384   } else {
24385     if (OkToApplyToBioseq (action, bsp)) {
24386       ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp);
24387     }
24388   }
24389 }
24390 
AddSequenceOrPartsFromSeqEntry(ApplyFeatureActionPtr action,SeqEntryPtr sep,ValNodePtr PNTR bsp_list)24391 static void AddSequenceOrPartsFromSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep, ValNodePtr PNTR bsp_list)
24392 {
24393   BioseqSetPtr bssp;
24394   SeqEntryPtr  seq_set;
24395 
24396   if (action == NULL || sep == NULL) return;
24397 
24398   while (sep != NULL) {
24399     if (IS_Bioseq (sep)) {
24400       AddSequenceOrParts (action, sep->data.ptrvalue, bsp_list);
24401     } else if (IS_Bioseq_set (sep)) {
24402       bssp = (BioseqSetPtr) sep->data.ptrvalue;
24403       if (bssp->_class == BioseqseqSet_class_segset) {
24404         /* find master segment */
24405         seq_set = bssp->seq_set;
24406         while (seq_set != NULL && !IS_Bioseq (seq_set)) {
24407           seq_set = seq_set->next;
24408         }
24409         if (seq_set != NULL) {
24410           AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list);
24411         }
24412       } else if (bssp->_class == BioseqseqSet_class_nuc_prot) {
24413         /* find nucleotide sequence */
24414         seq_set = bssp->seq_set;
24415         if (seq_set != NULL) {
24416           if (IS_Bioseq_set (seq_set)) {
24417             /* nucleotide is segmented set */
24418             bssp = (BioseqSetPtr) seq_set->data.ptrvalue;
24419             if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset
24420                 && bssp->seq_set != NULL && IS_Bioseq (bssp->seq_set)) {
24421               AddSequenceOrParts (action, bssp->seq_set->data.ptrvalue, bsp_list);
24422             }
24423           } else if (IS_Bioseq (seq_set)) {
24424             AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list);
24425           }
24426         }
24427       } else {
24428         /* add from set members */
24429         AddSequenceOrPartsFromSeqEntry (action, bssp->seq_set, bsp_list);
24430       }
24431     }
24432     sep = sep->next;
24433   }
24434 }
24435 
24436 
AdjustProteinSequenceForReadingFrame(SeqFeatPtr cds)24437 static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds)
24438 {
24439   BioseqPtr protbsp, bsp;
24440   ByteStorePtr bs;
24441   SeqFeatPtr   prot_sfp;
24442   Boolean      partial5, partial3;
24443 
24444   if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return;
24445 
24446   protbsp = BioseqFindFromSeqLoc (cds->product);
24447 
24448   if (protbsp == NULL) {
24449     bsp = BioseqFindFromSeqLoc (cds->location);
24450     if (bsp != NULL) {
24451       ExtraCDSCreationActions (cds, GetBestTopParentForData (bsp->idx.entityID, bsp));
24452     }
24453   } else {
24454     bs = ProteinFromCdRegionExWithTrailingCodonHandling (cds,
24455                                               TRUE,
24456                                               FALSE,
24457                                               TRUE);
24458     protbsp->seq_data = (SeqDataPtr) BSFree ((ByteStorePtr)(protbsp->seq_data));
24459     protbsp->seq_data = (SeqDataPtr) bs;
24460     protbsp->length = BSLen (bs);
24461     prot_sfp = GetProtFeature (protbsp);
24462     if (prot_sfp == NULL) {
24463       prot_sfp = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL);
24464       prot_sfp->data.value.ptrvalue = ProtRefNew ();
24465       CheckSeqLocForPartial (cds->location, &partial5, &partial3);
24466       SetSeqLocPartial (prot_sfp->location, partial5, partial3);
24467       prot_sfp->partial = (partial5 || partial3);
24468     } else {
24469       if (SeqLocLen (prot_sfp->location) != protbsp->length) {
24470         prot_sfp->location = SeqLocFree (prot_sfp->location);
24471         prot_sfp->location = SeqLocIntNew (0, protbsp->length - 1, Seq_strand_plus, SeqIdFindWorst (protbsp->id));
24472         CheckSeqLocForPartial (cds->location, &partial5, &partial3);
24473         SetSeqLocPartial (prot_sfp->location, partial5, partial3);
24474         prot_sfp->partial = (partial5 || partial3);
24475       }
24476     }
24477   }
24478 }
24479 
24480 
24481 NLM_EXTERN SeqFeatPtr
ApplyOneFeatureToBioseq(BioseqPtr bsp,Uint1 featdef,SeqLocPtr slp,ValNodePtr fields,ValNodePtr src_fields,Boolean add_mrna)24482 ApplyOneFeatureToBioseq
24483 (BioseqPtr bsp,
24484  Uint1 featdef,
24485  SeqLocPtr slp,
24486  ValNodePtr fields,
24487  ValNodePtr src_fields,
24488  Boolean    add_mrna)
24489 {
24490   Int4 seqfeattype;
24491   SeqFeatPtr sfp, gene = NULL, mrna = NULL;
24492   FeatQualLegalValPtr q;
24493   FeatureField f;
24494   ValNodePtr field_vnp;
24495   Int4       feature_type;
24496 
24497   seqfeattype = FindFeatFromFeatDefType (featdef);
24498   sfp = CreateNewFeatureOnBioseq (bsp, seqfeattype, slp);
24499   if (sfp == NULL) return NULL;
24500   feature_type = GetFeatureTypeFromFeatdef(featdef);
24501   CreateDataForFeature (sfp, feature_type);
24502   /* any extra actions */
24503   switch (featdef) {
24504     case FEATDEF_CDS :
24505       ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp));
24506       break;
24507     case FEATDEF_source :
24508       if (src_fields != NULL) {
24509         sfp->data.value.ptrvalue = ImpFeatFree (sfp->data.value.ptrvalue);
24510         sfp->data.choice = SEQFEAT_BIOSRC;
24511         sfp->data.value.ptrvalue = BioSourceFromSourceQualVals (src_fields);
24512       }
24513       break;
24514   }
24515   for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) {
24516     q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue;
24517     if (q != NULL) {
24518       f.field = ValNodeNew(NULL);
24519       f.field->next = NULL;
24520       f.field->choice = FeatQualChoice_legal_qual;
24521       f.field->data.intvalue = q->qual;
24522       if (sfp->data.choice != SEQFEAT_GENE
24523           && (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) {
24524         if (gene == NULL) {
24525           gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp);
24526           CreateDataForFeature (gene, Macro_feature_type_gene);
24527         }
24528         f.type = Macro_feature_type_gene;
24529         SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old);
24530       } else {
24531         f.type = feature_type;
24532         SetQualOnFeature (sfp, &f, NULL, q->val, ExistingTextOption_replace_old);
24533       }
24534     }
24535   }
24536   if (featdef == FEATDEF_CDS) {
24537     /* retranslate, to account for change in reading frame */
24538     AdjustProteinSequenceForReadingFrame (sfp);
24539     /* after the feature has been created, then adjust it for gaps */
24540     /* Note - this step may result in multiple coding regions being created. */
24541     AdjustCDSLocationsForUnknownGapsCallback (sfp, NULL);
24542     if (add_mrna) {
24543       slp = SeqLocCopy (slp);
24544       mrna = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, slp);
24545       CreateDataForFeature (mrna, Macro_feature_type_mRNA);
24546       for (field_vnp = fields; field_vnp != NULL; field_vnp = field_vnp->next) {
24547         q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue;
24548         if (q != NULL && q->qual == Feat_qual_legal_product) {
24549           f.field = ValNodeNew(NULL);
24550           f.field->next = NULL;
24551           f.field->choice = FeatQualChoice_legal_qual;
24552           f.field->data.intvalue = q->qual;
24553           f.type = Macro_feature_type_mRNA;
24554           SetQualOnFeature (mrna, &f, NULL, q->val, ExistingTextOption_replace_old);
24555         }
24556       }
24557     }
24558   }
24559   return sfp;
24560 }
24561 
24562 
ApplyApplyFeatureActionToSeqEntry(ApplyFeatureActionPtr action,SeqEntryPtr sep)24563 static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep)
24564 {
24565   ValNodePtr bsp_list = NULL, vnp;
24566   Int4       featdef;
24567   BioseqPtr  bsp;
24568   SeqFeatPtr sfp;
24569   SeqLocPtr  slp;
24570   SeqIdPtr   sip;
24571   Int4       num_created = 0;
24572   Int4       len;
24573   CharPtr    list_delimiters = " ,\t;";
24574   CharPtr    cp, tmp;
24575 
24576   if (sep == NULL || action == NULL) return 0;
24577 
24578   /* first, get list of Bioseqs to apply features to */
24579   /* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */
24580   if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) {
24581     for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
24582       cp = (CharPtr) vnp->data.ptrvalue;
24583       while (cp != NULL && *cp != 0) {
24584         len = StringCSpn (cp, list_delimiters);
24585         if (len > 0) {
24586           tmp = (CharPtr) MemNew (sizeof (Char) * (len + 1));
24587           StringNCpy (tmp, cp, len);
24588           tmp[len] = 0;
24589           sip = CreateSeqIdFromText (tmp, sep);
24590           bsp = BioseqFind (sip);
24591           if (bsp != NULL) {
24592             AddSequenceOrParts (action, bsp, &bsp_list);
24593           }
24594           cp += len;
24595         }
24596         cp += StringSpn (cp, list_delimiters);
24597       }
24598     }
24599   } else {
24600     AddSequenceOrPartsFromSeqEntry (action, sep, &bsp_list);
24601   }
24602 
24603   /* now add feature to each bioseq in list */
24604   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
24605     bsp = vnp->data.ptrvalue;
24606     if (bsp == NULL) continue;
24607     featdef = GetFeatdefFromFeatureType (action->type);
24608     slp = LocationFromApplyFeatureAction (bsp, action);
24609     sfp = ApplyOneFeatureToBioseq (bsp, featdef, slp, action->fields, action->src_fields, action->add_mrna);
24610     if (sfp != NULL) {
24611       num_created++;
24612     }
24613   }
24614   return num_created;
24615 }
24616 
24617 
24618 typedef struct convertandremovefeaturecollection {
24619   Uint1 featdef;
24620   ValNodePtr constraint_set;
24621   ValNodePtr feature_list;
24622 } ConvertAndRemoveFeatureCollectionData, PNTR ConvertAndRemoveFeatureCollectionPtr;
24623 
ConvertAndRemoveFeatureCollectionCallback(SeqFeatPtr sfp,Pointer data)24624 static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer data)
24625 {
24626   ConvertAndRemoveFeatureCollectionPtr p;
24627 
24628   if (sfp == NULL || data == NULL) return;
24629 
24630   p = (ConvertAndRemoveFeatureCollectionPtr) data;
24631   if ((p->featdef == FEATDEF_ANY || sfp->idx.subtype == p->featdef )
24632       && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) {
24633     ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp);
24634   }
24635 }
24636 
24637 
ApplyRemoveFeatureActionToSeqEntry(RemoveFeatureActionPtr action,SeqEntryPtr sep)24638 static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, SeqEntryPtr sep)
24639 {
24640   ConvertAndRemoveFeatureCollectionData d;
24641   ValNodePtr vnp;
24642   SeqFeatPtr sfp;
24643   Int4       num_deleted = 0, num_products_deleted = 0;
24644   BioseqPtr  bsp;
24645 
24646   if (action == NULL) return 0;
24647 
24648   d.featdef = GetFeatdefFromFeatureType (action->type);
24649   d.constraint_set = action->constraint;
24650   d.feature_list = NULL;
24651 
24652   VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback);
24653   for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) {
24654     sfp = vnp->data.ptrvalue;
24655     if (sfp != NULL) {
24656       sfp->idx.deleteme = TRUE;
24657       if (sfp->product != NULL && (bsp = BioseqFind(SeqLocId(sfp->product))) != NULL) {
24658         bsp->idx.deleteme = TRUE;
24659         num_products_deleted++;
24660       }
24661       num_deleted ++;
24662     }
24663   }
24664   d.feature_list = ValNodeFree (d.feature_list);
24665   DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
24666   RenormalizeNucProtSets (sep, TRUE);
24667   return num_deleted + num_products_deleted;
24668 }
24669 
24670 
24671 /* functions for converting features */
24672 
ApplyConvertFeatureSrcOptions(SeqFeatPtr sfp,ValNodePtr src_options,Boolean keep_original)24673 static Boolean ApplyConvertFeatureSrcOptions (SeqFeatPtr sfp, ValNodePtr src_options, Boolean keep_original)
24674 {
24675   ConvertFromCDSOptionsPtr options = NULL;
24676   Boolean rval = FALSE;
24677 
24678   if (sfp == NULL) return FALSE;
24679   if (src_options == NULL) return TRUE;
24680 
24681   if (src_options->choice == ConvertFeatureSrcOptions_cds) {
24682     options = (ConvertFromCDSOptionsPtr) src_options->data.ptrvalue;
24683     if (options != NULL) {
24684       ApplyCDSOptionsToFeature (sfp, options->remove_mRNA, options->remove_gene, options->remove_transcript_id, keep_original);
24685       rval = TRUE;
24686     }
24687   }
24688   return rval;
24689 }
24690 
24691 typedef Boolean (*ConvertFeatureFunc) PROTO ((SeqFeatPtr, Int4, ConvertFeatureDstOptionsPtr));
24692 
ApplyRNADestinationOptions(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24693 static void ApplyRNADestinationOptions (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24694 {
24695   CharPtr existing_class;
24696   FeatureField ff;
24697 
24698   /* apply destination options */
24699   if (featdef_to == FEATDEF_ncRNA
24700       && dst_options != NULL
24701       && dst_options->choice == ConvertFeatureDstOptions_ncrna_class
24702       && !StringHasNoText (dst_options->data.ptrvalue)) {
24703     ff.type = Macro_feature_type_ncRNA;
24704     ff.field = ValNodeNew (NULL);
24705     ff.field->choice = FeatQualChoice_legal_qual;
24706     ff.field->data.intvalue = Feat_qual_legal_ncRNA_class;
24707     existing_class = GetQualFromFeature (sfp, &ff, NULL);
24708     if (StringCmp (dst_options->data.ptrvalue, existing_class) != 0) {
24709       sfp->idx.subtype = FEATDEF_ncRNA;
24710       SetQualOnFeature (sfp, &ff, NULL, dst_options->data.ptrvalue, ExistingTextOption_append_semi);
24711     }
24712     existing_class = MemFree (existing_class);
24713     ff.field = ValNodeFree (ff.field);
24714   }
24715 }
24716 
ConvertCDSToRNAFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24717 static Boolean ConvertCDSToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24718 {
24719   Boolean rval;
24720 
24721   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) {
24722     return FALSE;
24723   }
24724 
24725   rval = ConvertCDSToRNA (sfp, featdef_to);
24726   if (rval) {
24727     ApplyRNADestinationOptions (sfp, featdef_to, dst_options);
24728   }
24729   return rval;
24730 }
24731 
24732 
ConvertGeneToRNAFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24733 static Boolean ConvertGeneToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24734 {
24735   Boolean rval;
24736 
24737   rval = ConvertGeneToRNA (sfp, featdef_to);
24738   if (rval) {
24739     ApplyRNADestinationOptions (sfp, featdef_to, dst_options);
24740   }
24741   return rval;
24742 }
24743 
24744 
ConvertBioSrcToRegionFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24745 static Boolean ConvertBioSrcToRegionFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24746 {
24747   return ConvertBioSrcToRepeatRegion (sfp, featdef_to);
24748 }
24749 
24750 
ConvertCDSToMiscFeatFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24751 static Boolean ConvertCDSToMiscFeatFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24752 {
24753   Boolean rval = FALSE;
24754   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) {
24755     return FALSE;
24756   }
24757   else if (sfp->pseudo)
24758   {
24759     rval = ConvertOnePseudoCDSToMiscFeatEx (sfp, FALSE);
24760   }
24761   else
24762   {
24763     /* do other here */
24764     rval = ConvertNonPseudoCDSToMiscFeat (sfp, FALSE);
24765   }
24766   return rval;
24767 }
24768 
ConvertImpToProtFuncEx(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24769 static Boolean ConvertImpToProtFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24770 {
24771   return ConvertImpToProtFunc (sfp, featdef_to);
24772 }
24773 
24774 
ConvertProtToImpFuncEx(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24775 static Boolean ConvertProtToImpFuncEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24776 {
24777   return ConvertProtToImpFunc (sfp, featdef_to);
24778 }
24779 
24780 
ConvertProtToProt(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24781 static Boolean ConvertProtToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24782 {
24783   return ConvertProtToProtFunc (sfp, featdef_to);
24784 }
24785 
24786 
ConvertCDSToMatPeptide(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24787 static Boolean ConvertCDSToMatPeptide (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24788 {
24789   return AutoConvertCDSToMiscFeat (sfp, (dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_remove_original) ? FALSE : dst_options->data.boolvalue);
24790 }
24791 
24792 
ConvertImpToRNAFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24793 static Boolean ConvertImpToRNAFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24794 {
24795   RnaRefPtr          rrp;
24796   GBQualPtr          qual, qual_prev = NULL;
24797   Boolean            add_to_comment = FALSE;
24798   CharPtr            old_comment = NULL;
24799 
24800   if (sfp == NULL || sfp->data.choice != SEQFEAT_IMP)
24801   {
24802     return FALSE;
24803   }
24804 
24805   for (qual = sfp->qual; qual != NULL && StringCmp (qual->qual, "product") != 0; qual = qual->next) {
24806     qual_prev = qual;
24807   }
24808   if (qual != NULL) {
24809     old_comment = StringSave (qual->val);
24810     if (qual_prev == NULL) {
24811       sfp->qual = qual->next;
24812     } else {
24813       qual_prev->next = qual->next;
24814     }
24815     qual->next = NULL;
24816     qual = GBQualFree (qual);
24817   } else {
24818     old_comment = sfp->comment;
24819     sfp->comment = NULL;
24820   }
24821 
24822   rrp = RnaRefFromLabel (featdef_to, old_comment, &add_to_comment);
24823 
24824   sfp->data.value.ptrvalue = ImpFeatFree ((ImpFeatPtr) sfp->data.value.ptrvalue);
24825   sfp->data.choice = SEQFEAT_RNA;
24826   sfp->data.value.ptrvalue = (Pointer) rrp;
24827   SetRNAProductString (sfp, NULL, old_comment, ExistingTextOption_replace_old);
24828 
24829   if (add_to_comment) {
24830     SetStringValue (&(sfp->comment), old_comment, ExistingTextOption_append_semi);
24831   }
24832   old_comment = MemFree (old_comment);
24833 
24834   ApplyRNADestinationOptions (sfp, featdef_to, dst_options);
24835 
24836   return TRUE;
24837 }
24838 
24839 
ConvertRegionToImp(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24840 static Boolean ConvertRegionToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24841 {
24842   return ConvertRegionToImpFunc (sfp, featdef_to);
24843 }
24844 
24845 
ConvertImpToImp(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24846 static Boolean ConvertImpToImp (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24847 {
24848   return ConvertImpToImpFunc (sfp, featdef_to);
24849 }
24850 
24851 
ConvertRegionToRNA(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24852 static Boolean ConvertRegionToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24853 {
24854   Boolean rval;
24855   rval = ConvertRegionToRNAFunc (sfp, featdef_to);
24856   if (rval) {
24857     ApplyRNADestinationOptions (sfp, featdef_to, dst_options);
24858   }
24859   return rval;
24860 }
24861 
24862 
ConvertncRNAToMiscBinding(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24863 static Boolean ConvertncRNAToMiscBinding (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24864 {
24865   RnaRefPtr  rrp;
24866   RNAGenPtr  rgp;
24867   ImpFeatPtr ifp;
24868 
24869   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
24870   if (NULL == rrp)
24871     return FALSE;
24872 
24873   if (rrp->ext.choice == 1) {
24874     /* move product to note */
24875     SetStringValue (&(sfp->comment), rrp->ext.value.ptrvalue, ExistingTextOption_append_semi);
24876   } else if (rrp->ext.choice == 3 && (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) != NULL
24877              && !StringHasNoText (rgp->product)) {
24878     SetStringValue (&(sfp->comment), rgp->product, ExistingTextOption_append_semi);
24879   }
24880   rrp = RnaRefFree (rrp);
24881   sfp->data.choice = SEQFEAT_IMP;
24882   ifp = ImpFeatNew ();
24883   ifp->key = StringSave ("misc_binding");
24884   sfp->data.value.ptrvalue = ifp;
24885   sfp->idx.subtype = 0;
24886 
24887   return TRUE;
24888 }
24889 
24890 
ConvertCommentToMiscFeat(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24891 static Boolean ConvertCommentToMiscFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24892 {
24893   ImpFeatPtr ifp;
24894 
24895   if (sfp == NULL || sfp->data.choice != SEQFEAT_COMMENT || sfp->data.value.ptrvalue != NULL)
24896   {
24897     return FALSE;
24898   }
24899 
24900   ifp = ImpFeatNew ();
24901   if (ifp != NULL) {
24902     ifp->key = StringSave ("misc_feature");
24903     sfp->data.choice = SEQFEAT_IMP;
24904     sfp->data.value.ptrvalue = (Pointer) ifp;
24905     return TRUE;
24906   }
24907   return FALSE;
24908 }
24909 
24910 
ConvertGeneToImpFeat(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24911 static Boolean ConvertGeneToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24912 {
24913   return ConvertGeneToImpFeatFunc (sfp, featdef_to);
24914 }
24915 
24916 
ConvertRNAToImpFeatEx(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24917 static Boolean ConvertRNAToImpFeatEx (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24918 {
24919   CharPtr product = NULL;
24920   ImpFeatPtr ifp;
24921   Uint1      seqfeattype;
24922 
24923   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) {
24924     return FALSE;
24925   }
24926 
24927   seqfeattype = FindFeatFromFeatDefType (featdef_to);
24928   if (seqfeattype != SEQFEAT_IMP) {
24929     return FALSE;
24930   }
24931 
24932   product = GetRNAProductString (sfp, NULL);
24933 
24934   RemoveRNAProductString (sfp, NULL);
24935 
24936   sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
24937 
24938   ifp = ImpFeatNew ();
24939   ifp->key = StringSave (GetImportFeatureName (featdef_to));
24940   sfp->data.choice = SEQFEAT_IMP;
24941   sfp->data.value.ptrvalue = (Pointer) ifp;
24942 
24943   SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi);
24944   product = MemFree (product);
24945   return TRUE;
24946 }
24947 
24948 
ConvertRNAToImpFeat(SeqFeatPtr sfp,Int4 featdef_to)24949 NLM_EXTERN Boolean ConvertRNAToImpFeat (SeqFeatPtr sfp, Int4 featdef_to)
24950 {
24951   return ConvertRNAToImpFeatEx (sfp, featdef_to, NULL);
24952 }
24953 
24954 
ConvertSiteToImpFeat(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24955 static Boolean ConvertSiteToImpFeat (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24956 {
24957   GBQualPtr  gbqual;
24958   ImpFeatPtr ifp;
24959   Int2       sitetype;
24960   CharPtr    str;
24961 
24962   if (sfp == NULL || sfp->data.choice != SEQFEAT_SITE)
24963   {
24964     return FALSE;
24965   }
24966 
24967   ifp = ImpFeatNew ();
24968   if (NULL == ifp)
24969   {
24970     return FALSE;
24971   }
24972 
24973   sitetype = (Int2) sfp->data.value.intvalue;
24974   sfp->data.choice = SEQFEAT_IMP;
24975   sfp->data.value.ptrvalue = (Pointer) ifp;
24976   ifp->key = StringSave (GetImportFeatureName (featdef_to));
24977   str = GetMacroSiteTypeName (MacroSiteTypeFromAsn1SiteType (sitetype));
24978   if (str != NULL) {
24979     gbqual = GBQualNew ();
24980     if (gbqual != NULL) {
24981       gbqual->qual = StringSave ("note");
24982       gbqual->val = StringSave (str);
24983       gbqual->next = sfp->qual;
24984       sfp->qual = gbqual;
24985     }
24986   }
24987   return TRUE;
24988 }
24989 
24990 
ConvertProtToRegion(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)24991 static Boolean ConvertProtToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
24992 {
24993   ProtRefPtr prp;
24994   ValNodePtr vnp;
24995   CharPtr    str;
24996 
24997   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT)
24998   {
24999     return FALSE;
25000   }
25001   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
25002   if (NULL == prp)
25003   {
25004     return FALSE;
25005   }
25006 
25007   vnp = prp->name;
25008   if (vnp != NULL && vnp->next == NULL) {
25009     str = (CharPtr) vnp->data.ptrvalue;
25010     if (! StringHasNoText (str)) {
25011       vnp->data.ptrvalue = NULL;
25012       sfp->data.value.ptrvalue = ProtRefFree (prp);
25013       sfp->data.choice = SEQFEAT_REGION;
25014       sfp->data.value.ptrvalue = (Pointer) str;
25015     }
25016   }
25017   return TRUE;
25018 }
25019 
25020 
ConvertRegionToProt(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25021 static Boolean ConvertRegionToProt (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25022 {
25023   return ConvertRegionToProtFunc (sfp, featdef_to);
25024 }
25025 
25026 
ConvertToBond(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25027 static Boolean ConvertToBond (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25028 {
25029   SeqLocPtr   slp = NULL;
25030   BioseqPtr   bsp;
25031   SeqEntryPtr sep;
25032   Boolean     no_cds = FALSE;
25033   SeqFeatPtr  new_sfp;
25034   SeqIdPtr    sip;
25035   SeqBondPtr  sbp;
25036   SeqPntPtr   spp;
25037 
25038   if (sfp == NULL || featdef_to != FEATDEF_BOND || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_bond) {
25039     return FALSE;
25040   }
25041 
25042   SeqFeatDataFree (&(sfp->data));
25043   sfp->data.choice = SEQFEAT_BOND;
25044   sfp->data.value.intvalue = Asn1BondTypeFromMacroBondType (dst_options->data.intvalue);
25045 
25046   bsp = BioseqFindFromSeqLoc (sfp->location);
25047 
25048   if (!ISA_aa (bsp->mol))
25049   {
25050     slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds);
25051     if (no_cds || slp == NULL) {
25052       return FALSE;
25053     }
25054     sfp->location = SeqLocFree (sfp->location);
25055     sfp->location = slp;
25056   }
25057 
25058   if (sfp->location->choice != SEQLOC_BOND) {
25059     sip = SeqLocId (sfp->location);
25060     if (sip != NULL) {
25061       sbp = SeqBondNew ();
25062       if (sbp != NULL) {
25063         slp = ValNodeNew (NULL);
25064         if (slp != NULL) {
25065           slp->choice = SEQLOC_BOND;
25066           slp->data.ptrvalue = (Pointer) sbp;
25067           spp = SeqPntNew ();
25068           if (spp != NULL) {
25069             spp->strand = SeqLocStrand (sfp->location);
25070             spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0)));
25071             spp->point = SeqLocStart (sfp->location);
25072             sbp->a = spp;
25073           }
25074           spp = SeqPntNew ();
25075           if (spp != NULL) {
25076             spp->strand = SeqLocStrand (sfp->location);
25077             spp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (sip, 0)));
25078             spp->point = SeqLocStop (sfp->location);
25079             sbp->b = spp;
25080           }
25081           sfp->location = SeqLocFree (sfp->location);
25082           sfp->location = slp;
25083         }
25084       }
25085     }
25086   }
25087 
25088   sfp->idx.subtype = 0;
25089 
25090   bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID);
25091   if (bsp == NULL) {
25092     return FALSE;
25093   }
25094   sep = SeqMgrGetSeqEntryForData (bsp);
25095   if (sep == NULL) {
25096     return FALSE;
25097   }
25098 
25099   new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite);
25100   sfp->idx.deleteme = TRUE;
25101   CreateNewFeature (sep, NULL, SEQFEAT_BOND, new_sfp);
25102 
25103   return TRUE;
25104 }
25105 
25106 
ConvertToSite(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25107 static Boolean ConvertToSite (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25108 {
25109   SeqLocPtr   slp = NULL;
25110   BioseqPtr   bsp;
25111   SeqEntryPtr sep;
25112   Boolean     no_cds = FALSE;
25113   SeqFeatPtr  new_sfp;
25114 
25115   if (sfp == NULL || featdef_to != FEATDEF_SITE || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_site) {
25116     return FALSE;
25117   }
25118 
25119   SeqFeatDataFree (&(sfp->data));
25120   sfp->data.choice = SEQFEAT_SITE;
25121   sfp->data.value.intvalue = Asn1SiteTypeFromMacroSiteType (dst_options->data.intvalue);
25122 
25123   bsp = BioseqFindFromSeqLoc (sfp->location);
25124 
25125   if (!ISA_aa (bsp->mol))
25126   {
25127     slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds);
25128     if (no_cds || slp == NULL) {
25129       return FALSE;
25130     }
25131     sfp->location = SeqLocFree (sfp->location);
25132     sfp->location = slp;
25133   }
25134 
25135   sfp->idx.subtype = 0;
25136 
25137   bsp = GetBioseqGivenSeqLoc (slp, sfp->idx.entityID);
25138   if (bsp == NULL) {
25139     return FALSE;
25140   }
25141   sep = SeqMgrGetSeqEntryForData (bsp);
25142   if (sep == NULL) {
25143     return FALSE;
25144   }
25145 
25146   new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite);
25147   sfp->idx.deleteme = TRUE;
25148   CreateNewFeature (sep, NULL, SEQFEAT_SITE, new_sfp);
25149 
25150   return TRUE;
25151 }
25152 
25153 
ConvertToRegion(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25154 static Boolean ConvertToRegion (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25155 {
25156   BioseqPtr     bsp;
25157   RegionTypePtr r;
25158   Boolean       create_prot_feats, no_cds = FALSE;
25159   SeqLocPtr     slp;
25160   SeqEntryPtr   sep;
25161   SeqFeatPtr    new_sfp;
25162 
25163   if (sfp == NULL || featdef_to != FEATDEF_REGION || dst_options == NULL || dst_options->choice != ConvertFeatureDstOptions_region || dst_options->data.ptrvalue == NULL) {
25164     return FALSE;
25165   }
25166 
25167   r = (RegionTypePtr) dst_options->data.ptrvalue;
25168   create_prot_feats = !r->create_nucleotide;
25169 
25170   bsp = BioseqFindFromSeqLoc (sfp->location);
25171   if (bsp == NULL) return FALSE;
25172 
25173   if (ISA_aa (bsp->mol))
25174   {
25175     if (create_prot_feats)
25176     {
25177       slp = (SeqLocPtr) AsnIoMemCopy (sfp->location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
25178     }
25179     else
25180     {
25181       slp = FindNucleotideLocationForProteinFeatureConversion (sfp->location);
25182     }
25183     sfp->location = SeqLocFree (sfp->location);
25184     sfp->location = slp;
25185   }
25186   else if (create_prot_feats)
25187   {
25188     slp = GetProteinLocationForNucleotideFeatureConversion (sfp->location, &no_cds);
25189     if (no_cds) {
25190       return FALSE;
25191     }
25192     sfp->location = SeqLocFree (sfp->location);
25193     sfp->location = slp;
25194   }
25195 
25196   bsp = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID);
25197   if (bsp == NULL) {
25198     return FALSE;
25199   }
25200 
25201   sep = SeqMgrGetSeqEntryForData (bsp);
25202   if (sep == NULL) {
25203     return FALSE;
25204   }
25205 
25206   SeqFeatDataFree (&(sfp->data));
25207   sfp->data.choice = SEQFEAT_REGION;
25208   sfp->data.value.ptrvalue = sfp->comment;
25209   sfp->comment = NULL;
25210 
25211   new_sfp = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite);
25212   sfp->idx.deleteme = TRUE;
25213   CreateNewFeature (sep, NULL, SEQFEAT_REGION, new_sfp);
25214   return TRUE;
25215 }
25216 
25217 
ConvertRNAToRNA(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25218 static Boolean ConvertRNAToRNA (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25219 {
25220   RnaRefPtr  rrp;
25221   Boolean    add_to_comment = FALSE;
25222   CharPtr    product;
25223 
25224   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
25225   if (NULL == rrp) {
25226     return FALSE;
25227   }
25228 
25229   product = GetRNAProductString (sfp, NULL);
25230 
25231   RemoveRNAProductString (sfp, NULL);
25232 
25233   sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
25234 
25235   sfp->data.value.ptrvalue = RnaRefFromLabel (featdef_to, product, &add_to_comment);
25236 
25237   SetRNAProductString (sfp, NULL, product, ExistingTextOption_replace_old);
25238   if (add_to_comment) {
25239     SetStringValue (&(sfp->comment), product, ExistingTextOption_append_semi);
25240   }
25241   product = MemFree (product);
25242 
25243   /* apply destination options */
25244   ApplyRNADestinationOptions (sfp, featdef_to, dst_options);
25245 
25246   sfp->idx.subtype = 0;
25247   return TRUE;
25248 }
25249 
25250 
MiscFeatToCodingRegionConvertFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25251 static Boolean MiscFeatToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25252 {
25253   return ConvertMiscFeatToCodingRegion (sfp);
25254 }
25255 
25256 
mRNAToCodingRegionConvertFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25257 static Boolean mRNAToCodingRegionConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25258 {
25259   return ConvertmRNAToCodingRegion (sfp);
25260 }
25261 
25262 
tRNAToGeneConvertFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25263 static Boolean tRNAToGeneConvertFunc(SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25264 {
25265   return ConverttRNAToGene (sfp);
25266 }
25267 
25268 
MiscFeatToGeneConvertFunc(SeqFeatPtr sfp,Int4 featdef_to,ConvertFeatureDstOptionsPtr dst_options)25269 static Boolean MiscFeatToGeneConvertFunc (SeqFeatPtr sfp, Int4 featdef_to, ConvertFeatureDstOptionsPtr dst_options)
25270 {
25271   return ConvertMiscFeatToGene (sfp);
25272 }
25273 
25274 
25275 typedef struct convertfeattable {
25276   Uint2 seqfeat_from;
25277   Uint2 featdef_from;
25278   Uint2 seqfeat_to;
25279   Uint2 featdef_to;
25280   ConvertFeatureFunc func;
25281   CharPtr help_text;
25282 } ConvertFeatTableData, PNTR ConvertFeatTablePtr;
25283 
25284 static ConvertFeatTableData conversion_functions[] = {
25285   { SEQFEAT_CDREGION, FEATDEF_CDS,                SEQFEAT_RNA,    FEATDEF_ANY,
25286     ConvertCDSToRNAFunc,
25287     "Delete protein product sequence.\nClear product field if transcript ID removal was requested.\nIf converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note.\nIf converting to other RNA, put label in RNA product." },
25288   { SEQFEAT_GENE,     FEATDEF_GENE,               SEQFEAT_RNA,    FEATDEF_ANY,
25289     ConvertGeneToRNAFunc,
25290     "If converting to tRNA and anticodon value can be parsed from label, set aa value, and add any text that could not be parsed into an anticodon value to the feature note.  If converting to other RNA, put label in RNA product.  Also append gene locus, allele, description, map location, and locus tag to comment (as long as these values are not already in the label and therefore in the RNA product)." },
25291   { SEQFEAT_BIOSRC,   FEATDEF_BIOSRC,             SEQFEAT_IMP,    FEATDEF_repeat_region,
25292     ConvertBioSrcToRegionFunc,
25293     "Creates a repeat_region with mobile_element qualifiers for the transposon and/or insertion sequence qualifiers on the BioSource.  All other BioSource information is discarded." },
25294   { SEQFEAT_CDREGION, FEATDEF_CDS,                SEQFEAT_IMP,    FEATDEF_misc_feature,
25295     ConvertCDSToMiscFeatFunc,
25296     "Copy comment from coding region to new misc_feature and remove product field.  If not pseudo coding region, add product name from protein feature to new misc_feature comment and delete product sequence." },
25297   { SEQFEAT_IMP,      FEATDEF_ANY,                SEQFEAT_PROT,   FEATDEF_ANY,
25298     ConvertImpToProtFuncEx,
25299     "Original feature must be on nucleotide sequence and be contained in coding region location.  Coding region must have product protein sequence.  New feature is created on product protein sequence so that the translated location will be as close as possible to the original nucleotide location (may not be exact because of codon boundaries)." },
25300   { SEQFEAT_PROT,     FEATDEF_mat_peptide_aa,     SEQFEAT_IMP,    FEATDEF_ANY,
25301     ConvertProtToImpFuncEx,
25302     "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n"
25303     "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n"
25304     "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n"
25305     "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." },
25306   { SEQFEAT_PROT,     FEATDEF_sig_peptide_aa,     SEQFEAT_IMP,    FEATDEF_ANY,
25307     ConvertProtToImpFuncEx,
25308     "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n"
25309     "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n"
25310     "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n"
25311     "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." },
25312   { SEQFEAT_PROT,     FEATDEF_transit_peptide_aa, SEQFEAT_IMP,    FEATDEF_ANY,
25313     ConvertProtToImpFuncEx,
25314     "Original feature must be on a protein sequence that is a product of a coding region.\nNew feature will be created on same sequence as coding region.\n"
25315     "If protein feature has name, this will be saved as /product qualifier on new feature.\nIf protein feature does not have name but does have description, this will be saved as /product qualifier on new feature.\n"
25316     "EC_number values from the protein feature will be saved as /EC_number qualifiers on the new feature.\nActivity values will be saved as /function qualifiers on the new feature.\n"
25317     "Db_xref values from the protein feature will be saved as /db_xref qualifers on the new feature." },
25318   { SEQFEAT_IMP,      FEATDEF_ANY,                SEQFEAT_RNA,    FEATDEF_ANY,
25319     ConvertImpToRNAFunc,
25320     "Creates an RNA feature of the specified subtype.  Import feature key is discarded." },
25321   { SEQFEAT_IMP,      FEATDEF_misc_feature,       SEQFEAT_CDREGION, FEATDEF_CDS,
25322     MiscFeatToCodingRegionConvertFunc,
25323     "Use misc_feature comment for coding region product name." },
25324   { SEQFEAT_IMP,      FEATDEF_misc_feature,       SEQFEAT_GENE, FEATDEF_GENE,
25325     MiscFeatToGeneConvertFunc,
25326     "Creates gene with locus value from misc_feature comment." },
25327   { SEQFEAT_REGION,   FEATDEF_REGION,             SEQFEAT_IMP,    FEATDEF_ANY,
25328     ConvertRegionToImp,
25329     "Creates a misc_feature with the region name saved as a /note qualifier." },
25330   { SEQFEAT_REGION,   FEATDEF_REGION,             SEQFEAT_RNA,    FEATDEF_ANY,
25331     ConvertRegionToRNA,
25332     "Creates an RNA feature with the region name as the product name." },
25333   { SEQFEAT_COMMENT,  FEATDEF_ANY,                SEQFEAT_IMP,    FEATDEF_misc_feature,
25334     ConvertCommentToMiscFeat,
25335     "Creates a misc_feature with the same note as the original.  Note - the flatfile display for the feature is the same." },
25336   { SEQFEAT_GENE,     FEATDEF_GENE,               SEQFEAT_IMP,    FEATDEF_ANY,
25337     ConvertGeneToImpFeat,
25338     "Creates an import feature with the gene description and locus prepended to the original comment, separated by semicolons." },
25339   { SEQFEAT_RNA,      FEATDEF_ANY,                SEQFEAT_IMP,    FEATDEF_ANY,
25340     ConvertRNAToImpFeatEx,
25341     "Creates an import feature of the specified subtype and adds the RNA product name to the comment." } ,
25342   { SEQFEAT_RNA,      FEATDEF_mRNA,               SEQFEAT_CDREGION, FEATDEF_CDS,
25343     mRNAToCodingRegionConvertFunc,
25344     "Convert mRNA to coding region, use mRNA product for protein feature" },
25345   { SEQFEAT_RNA,      FEATDEF_tRNA,               SEQFEAT_GENE,   FEATDEF_GENE,
25346     tRNAToGeneConvertFunc,
25347     "Convert tRNA to gene, use tRNA product for gene description" },
25348   { SEQFEAT_SITE,     FEATDEF_ANY,                SEQFEAT_IMP,    FEATDEF_ANY,
25349     ConvertSiteToImpFeat,
25350     "Creates an import feature of the specified subtype with the site type name as a /note qualifier." } ,
25351   { SEQFEAT_PROT,     FEATDEF_mat_peptide_aa,     SEQFEAT_REGION, FEATDEF_REGION,
25352     NULL,
25353     "Creates a Region feature with the protein name as the region name." },
25354   { SEQFEAT_PROT,     FEATDEF_ANY,     SEQFEAT_REGION, FEATDEF_REGION,
25355     ConvertProtToRegion,
25356     "Creates a Region feature with the protein name as the region name." },
25357   { SEQFEAT_REGION,   FEATDEF_REGION,             SEQFEAT_PROT,   FEATDEF_ANY,
25358     ConvertRegionToProt,
25359     "If feature is on nucleotide sequence, will create feature on protein product sequence for overlapping coding region.  Protein name will be region name." },
25360   { 0,                FEATDEF_ANY,                SEQFEAT_BOND,    FEATDEF_BOND,
25361     ConvertToBond,
25362     "Create Bond feature with specified bond type.  Location is a SeqLocBond with a point at the start of the original location and a point at the end of the original location.  All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." },
25363   { 0,                FEATDEF_ANY,                SEQFEAT_SITE,    FEATDEF_SITE,
25364     ConvertToSite,
25365     "Create Site feature with specified site type.  All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." },
25366   { 0,                FEATDEF_ANY,                SEQFEAT_REGION,    FEATDEF_REGION,
25367     ConvertToRegion,
25368     "Create Region feature on nucleotide sequence or protein product sequence of overlapping coding region as specified.  Use comment on feature for region name.\n"
25369     "All feature ID, partialness, except, comment, product, location, genbank qualifiers, title, citation, experimental evidence, gene xrefs, db xrefs, and pseudo-ness information is discarded." },
25370   { SEQFEAT_IMP,      FEATDEF_ANY,                SEQFEAT_IMP,    FEATDEF_ANY,
25371     ConvertImpToImp,
25372     "Changes type of import feature." },
25373   { SEQFEAT_RNA,      FEATDEF_ANY,                SEQFEAT_RNA,    FEATDEF_ANY,
25374     ConvertRNAToRNA,
25375     "Changes type of RNA feature." },
25376   { SEQFEAT_RNA,      FEATDEF_ncRNA,              SEQFEAT_IMP,    FEATDEF_misc_binding,
25377     ConvertncRNAToMiscBinding,
25378     "Changes ncRNA to misc_binding." },
25379   { SEQFEAT_PROT,     FEATDEF_ANY,                SEQFEAT_PROT,   FEATDEF_ANY,
25380     ConvertProtToProt,
25381     "Changes type of protein feature." },
25382   { SEQFEAT_CDREGION, FEATDEF_CDS,                SEQFEAT_PROT,   FEATDEF_mat_peptide_aa,
25383     ConvertCDSToMatPeptide,
25384     "If coding region is overlapped by another coding region, will convert the coding region to a mat-peptide on the overlapping coding region's protein sequence, otherwise if you have checked \"Leave Original Feature\" it will create a mat-peptide with the same protein names and description on the protein sequence for the coding region." }
25385 
25386 };
25387 
25388 
25389 static Int4 num_convert_feature_table_lines = sizeof (conversion_functions) / sizeof (ConvertFeatTableData);
25390 
GetConversionFunctionTableLine(Uint2 seqfeat_from,Uint2 featdef_from,Uint2 seqfeat_to,Uint2 featdef_to)25391 static Int4 GetConversionFunctionTableLine (Uint2 seqfeat_from, Uint2 featdef_from, Uint2 seqfeat_to, Uint2 featdef_to)
25392 {
25393   Int4 i, table_line_num = -1;
25394 
25395   for (i = 0; i < num_convert_feature_table_lines && table_line_num == -1; i++)
25396   {
25397     if ((conversion_functions[i].seqfeat_from == 0 || conversion_functions[i].seqfeat_from == seqfeat_from)
25398         && (conversion_functions[i].featdef_from == FEATDEF_ANY || conversion_functions[i].featdef_from == featdef_from)
25399         && (conversion_functions[i].seqfeat_to == 0 || conversion_functions[i].seqfeat_to == seqfeat_to)
25400         && (conversion_functions[i].featdef_to == FEATDEF_ANY || conversion_functions[i].featdef_to == featdef_to))
25401     {
25402       table_line_num = i;
25403     }
25404   }
25405   return table_line_num;
25406 }
25407 
25408 
IsConversionSupported(Uint2 type_from,Uint2 type_to)25409 NLM_EXTERN Boolean IsConversionSupported (Uint2 type_from, Uint2 type_to)
25410 {
25411   Int4 line;
25412   Uint2 featdef_from, featdef_to, seqfeat_from, seqfeat_to;
25413 
25414   featdef_from = GetFeatdefFromFeatureType (type_from);
25415   seqfeat_from = FindFeatFromFeatDefType (featdef_from);
25416   featdef_to = GetFeatdefFromFeatureType (type_to);
25417   seqfeat_to = FindFeatFromFeatDefType (featdef_to);
25418   line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to);
25419   if (line > -1 && conversion_functions[line].func != NULL) {
25420     return TRUE;
25421   } else {
25422     return FALSE;
25423   }
25424 }
25425 
25426 
GetFeatureTextForLogging(SeqFeatPtr sfp)25427 static CharPtr GetFeatureTextForLogging (SeqFeatPtr sfp)
25428 {
25429   ValNode    vn;
25430   Int4       len;
25431   CharPtr    txt = NULL;
25432 
25433   MemSet (&vn, 0, sizeof (ValNode));
25434   vn.choice = OBJ_SEQFEAT;
25435   vn.data.ptrvalue = sfp;
25436   txt = GetDiscrepancyItemText (&vn);
25437   if (txt == NULL) {
25438     txt = StringSave ("(null)");
25439   } else {
25440     len = StringLen (txt);
25441     if (len > 0 && txt[len - 1] == '\n') {
25442       txt[len - 1] = 0;
25443     }
25444   }
25445   return txt;
25446 }
25447 
25448 
ApplyConvertFeatureActionToSeqEntry(ConvertFeatureActionPtr action,SeqEntryPtr sep,FILE * log_fp)25449 static Int4 ApplyConvertFeatureActionToSeqEntry (ConvertFeatureActionPtr action, SeqEntryPtr sep, FILE *log_fp)
25450 {
25451   ConvertAndRemoveFeatureCollectionData d;
25452   ValNodePtr vnp;
25453   SeqFeatPtr sfp, sfp_copy;
25454   Int4       num_affected = 0, table_line;
25455   Uint2      seqfeat_from, featdef_from, seqfeat_to, featdef_to;
25456   /* variables for logging */
25457   CharPtr    txt_old, txt_new;
25458 
25459   if (action == NULL) return 0;
25460 
25461   featdef_from = GetFeatdefFromFeatureType (action->type_from);
25462   seqfeat_from = FindFeatFromFeatDefType(featdef_from);
25463   featdef_to = GetFeatdefFromFeatureType (action->type_to);
25464   seqfeat_to = FindFeatFromFeatDefType (featdef_to);
25465   table_line = GetConversionFunctionTableLine (seqfeat_from, featdef_from, seqfeat_to, featdef_to);
25466   if (table_line < 0 || conversion_functions[table_line].func == NULL) {
25467     return 0;
25468   }
25469 
25470   d.featdef = GetFeatdefFromFeatureType (action->type_from);
25471   d.constraint_set = action->src_feat_constraint;
25472   d.feature_list = NULL;
25473 
25474   VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback);
25475   if (d.feature_list == NULL) {
25476       return 0;
25477   }
25478   for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) {
25479     sfp = vnp->data.ptrvalue;
25480     if (sfp != NULL) {
25481       sfp_copy = (SeqFeatPtr) AsnIoMemCopy (sfp, (AsnReadFunc) SeqFeatAsnRead, (AsnWriteFunc) SeqFeatAsnWrite);
25482       /* add subtype value to copy */
25483       sfp_copy->idx.subtype = sfp->idx.subtype;
25484       sfp_copy->next = sfp->next;
25485       sfp->next = sfp_copy;
25486 
25487       if (conversion_functions[table_line].func (sfp_copy, featdef_to, action->dst_options)) {
25488         ApplyConvertFeatureSrcOptions (sfp_copy, action->src_options, action->leave_original);
25489         num_affected ++;
25490         if (!action->leave_original) {
25491           sfp->idx.deleteme = TRUE;
25492         }
25493         if (log_fp != NULL) {
25494           txt_old = GetFeatureTextForLogging (sfp);
25495           txt_new = GetFeatureTextForLogging (sfp_copy);
25496           if (action->leave_original) {
25497             fprintf (log_fp, "Added new feature %s based on %s\n", txt_new, txt_old);
25498           } else {
25499             fprintf (log_fp, "Replaced feature %s with %s\n", txt_old, txt_new);
25500           }
25501           txt_old = MemFree (txt_old);
25502           txt_new = MemFree (txt_new);
25503         }
25504         sfp_copy->idx.subtype = 0;
25505       } else {
25506         sfp_copy->idx.deleteme = TRUE;
25507       }
25508     }
25509   }
25510   DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
25511   RenormalizeNucProtSets (sep, TRUE);
25512   return num_affected;
25513 }
25514 
25515 
25516 /* Functions for editing feature locations */
DoesStrandMatch(Int4 strand_choice,Uint1 strand_val)25517 static Boolean DoesStrandMatch (Int4 strand_choice, Uint1 strand_val)
25518 {
25519   Boolean rval = FALSE;
25520 
25521   switch (strand_choice)
25522   {
25523     case Feature_location_strand_from_any:
25524       rval = TRUE;
25525       break;
25526     case Feature_location_strand_from_unknown:
25527       if (strand_val == Seq_strand_unknown)
25528       {
25529         rval = TRUE;
25530       }
25531       break;
25532     case Feature_location_strand_from_plus:
25533       if (strand_val != Seq_strand_minus)
25534       {
25535         rval = TRUE;
25536       }
25537       break;
25538     case Feature_location_strand_from_minus:
25539       if (strand_val == Seq_strand_minus)
25540       {
25541         rval = TRUE;
25542       }
25543       break;
25544     case Feature_location_strand_from_both:
25545       if (strand_val == Seq_strand_both)
25546       {
25547         rval = TRUE;
25548       }
25549       break;
25550   }
25551   return rval;
25552 }
25553 
25554 
GetNewStrandValue(Int4 strand_choice,Uint1 strand_val)25555 static Uint1 GetNewStrandValue (Int4 strand_choice, Uint1 strand_val)
25556 {
25557   Uint1 rval = Seq_strand_unknown;
25558 
25559   switch (strand_choice)
25560   {
25561     case Feature_location_strand_to_reverse:
25562       switch (strand_val)
25563       {
25564         case Seq_strand_plus:
25565         case Seq_strand_unknown:
25566           rval = Seq_strand_minus;
25567           break;
25568         case Seq_strand_minus:
25569           rval = Seq_strand_plus;
25570           break;
25571         default:
25572           rval = strand_val;
25573           break;
25574       }
25575       break;
25576     case Feature_location_strand_to_unknown:
25577       rval = Seq_strand_unknown;
25578       break;
25579     case Feature_location_strand_to_plus:
25580       rval = Seq_strand_plus;
25581       break;
25582     case Feature_location_strand_to_minus:
25583       rval = Seq_strand_minus;
25584       break;
25585     case Feature_location_strand_to_both:
25586       rval = Seq_strand_both;
25587       break;
25588   }
25589   return rval;
25590 }
25591 
25592 
ConvertLocationStrand(SeqLocPtr slp,Int4 fromStrand,Int4 toStrand)25593 static Boolean ConvertLocationStrand (SeqLocPtr slp, Int4 fromStrand, Int4 toStrand)
25594 {
25595   SeqLocPtr      loc;
25596   PackSeqPntPtr  psp;
25597   SeqBondPtr     sbp;
25598   SeqIntPtr      sinp;
25599   SeqPntPtr      spp;
25600   Boolean        rval = FALSE;
25601   Uint1          strand_orig;
25602 
25603   while (slp != NULL) {
25604     switch (slp->choice) {
25605       case SEQLOC_NULL :
25606         break;
25607       case SEQLOC_EMPTY :
25608       case SEQLOC_WHOLE :
25609         break;
25610       case SEQLOC_INT :
25611         sinp = (SeqIntPtr) slp->data.ptrvalue;
25612         if (sinp != NULL && DoesStrandMatch (fromStrand, sinp->strand))
25613         {
25614           strand_orig = sinp->strand;
25615           sinp->strand = GetNewStrandValue (toStrand, sinp->strand);
25616           if (strand_orig != sinp->strand) {
25617             rval = TRUE;
25618           }
25619         }
25620         break;
25621       case SEQLOC_PNT :
25622         spp = (SeqPntPtr) slp->data.ptrvalue;
25623         if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand))
25624         {
25625           strand_orig = spp->strand;
25626           spp->strand = GetNewStrandValue (toStrand, spp->strand);
25627           if (strand_orig != spp->strand) {
25628             rval = TRUE;
25629           }
25630         }
25631         break;
25632       case SEQLOC_PACKED_PNT :
25633         psp = (PackSeqPntPtr) slp->data.ptrvalue;
25634         if (psp != NULL && DoesStrandMatch (fromStrand, psp->strand))
25635         {
25636           strand_orig = psp->strand;
25637           psp->strand = GetNewStrandValue (toStrand, psp->strand);
25638           if (strand_orig != psp->strand) {
25639             rval = TRUE;
25640           }
25641         }
25642         break;
25643       case SEQLOC_PACKED_INT :
25644       case SEQLOC_MIX :
25645       case SEQLOC_EQUIV :
25646         loc = (SeqLocPtr) slp->data.ptrvalue;
25647         while (loc != NULL) {
25648           rval |= ConvertLocationStrand (loc, fromStrand, toStrand);
25649           loc = loc->next;
25650         }
25651         break;
25652       case SEQLOC_BOND :
25653         sbp = (SeqBondPtr) slp->data.ptrvalue;
25654         if (sbp != NULL) {
25655           spp = (SeqPntPtr) sbp->a;
25656           if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand))
25657           {
25658             strand_orig = spp->strand;
25659             spp->strand = GetNewStrandValue (toStrand, spp->strand);
25660             if (strand_orig != spp->strand) {
25661               rval = TRUE;
25662             }
25663           }
25664           spp = (SeqPntPtr) sbp->b;
25665           if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand))
25666           {
25667             strand_orig = spp->strand;
25668             spp->strand = GetNewStrandValue (toStrand, spp->strand);
25669             if (strand_orig != spp->strand) {
25670               rval = TRUE;
25671             }
25672           }
25673         }
25674         break;
25675       case SEQLOC_FEAT :
25676         break;
25677       default :
25678         break;
25679     }
25680     slp = slp->next;
25681   }
25682   return rval;
25683 }
25684 
25685 
ApplyEditLocationStrandToSeqFeat(EditLocationStrandPtr edit,SeqFeatPtr sfp)25686 static Boolean ApplyEditLocationStrandToSeqFeat (EditLocationStrandPtr edit, SeqFeatPtr sfp)
25687 {
25688   Boolean rval = FALSE;
25689 
25690   if (edit == NULL || sfp == NULL) {
25691     return FALSE;
25692   }
25693 
25694   rval = ConvertLocationStrand (sfp->location, edit->strand_from, edit->strand_to);
25695   return rval;
25696 }
25697 
25698 
At5EndOfSequence(SeqLocPtr slp,BioseqPtr bsp)25699 static Boolean At5EndOfSequence (SeqLocPtr slp, BioseqPtr bsp)
25700 {
25701   Uint1 strand;
25702   Int4  start;
25703   Boolean at_end = FALSE;
25704 
25705   if (slp == NULL || bsp == NULL) return FALSE;
25706 
25707   strand = SeqLocStrand (slp);
25708 
25709   if (strand == Seq_strand_minus) {
25710     start = SeqLocStop (slp);
25711     if (start == bsp->length - 1) {
25712       at_end = TRUE;
25713     }
25714   } else {
25715     start = SeqLocStart (slp);
25716     if (start == 0) {
25717       at_end = TRUE;
25718     }
25719   }
25720   return at_end;
25721 }
25722 
25723 
HasGoodStartCodon(SeqFeatPtr sfp)25724 static Boolean HasGoodStartCodon (SeqFeatPtr sfp)
25725 {
25726   ByteStorePtr bs;
25727   CharPtr      prot;
25728   Boolean     has_start = FALSE;
25729 
25730   if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
25731     bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE);
25732     if (bs != NULL) {
25733       prot = BSMerge (bs, NULL);
25734       bs = BSFree (bs);
25735       if (prot != NULL && *prot == 'M') {
25736         has_start = TRUE;
25737       }
25738       prot = MemFree (prot);
25739     }
25740   }
25741   return has_start;
25742 }
25743 
25744 
ApplyPartial5SetActionToSeqFeat(Partial5SetActionPtr action,SeqFeatPtr sfp)25745 static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, SeqFeatPtr sfp)
25746 {
25747   Boolean      rval = FALSE;
25748   Boolean      make_partial = FALSE;
25749   Uint1        strand;
25750   BioseqPtr    bsp;
25751   CdRegionPtr  crp;
25752   Boolean      partial5, partial3;
25753 
25754   if (action == NULL || sfp == NULL) return FALSE;
25755   bsp = BioseqFindFromSeqLoc (sfp->location);
25756   strand = SeqLocStrand (sfp->location);
25757 
25758   switch (action->constraint) {
25759     case Partial_5_set_constraint_all:
25760       make_partial = TRUE;
25761       break;
25762     case Partial_5_set_constraint_at_end:
25763       make_partial = At5EndOfSequence (sfp->location, bsp);
25764       break;
25765     case Partial_5_set_constraint_bad_start:
25766       make_partial = HasGoodStartCodon (sfp);
25767       break;
25768     case Partial_5_set_constraint_frame_not_one:
25769       if (sfp->data.choice == SEQFEAT_CDREGION
25770           && (crp = sfp->data.value.ptrvalue) != NULL
25771           && crp->frame != 0 && crp->frame != 1) {
25772         make_partial = TRUE;
25773       }
25774       break;
25775   }
25776 
25777   if (make_partial) {
25778     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
25779     if (!partial5) {
25780       SetSeqLocPartial (sfp->location, TRUE, partial3);
25781       if (action->extend && bsp != NULL) {
25782         if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) {
25783           ChooseBestFrame (sfp);
25784         }
25785       }
25786       rval = TRUE;
25787     }
25788   }
25789   return rval;
25790 }
25791 
25792 
ApplyClear5PartialToSeqFeat(Int4 action,SeqFeatPtr sfp)25793 static Boolean ApplyClear5PartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
25794 {
25795   Boolean rval = FALSE, clear_partial = FALSE;
25796   Boolean partial5, partial3;
25797 
25798   if (sfp == NULL) return FALSE;
25799 
25800   switch (action) {
25801     case Partial_5_clear_constraint_all:
25802       clear_partial = TRUE;
25803       break;
25804     case Partial_5_clear_constraint_not_at_end:
25805       clear_partial = !At5EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location));
25806       break;
25807     case Partial_5_clear_constraint_good_start:
25808       clear_partial = !HasGoodStartCodon(sfp);
25809       break;
25810   }
25811   if (clear_partial) {
25812     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
25813     if (partial5) {
25814       SetSeqLocPartial (sfp->location, FALSE, partial3);
25815       rval = TRUE;
25816     }
25817   }
25818   return rval;
25819 }
25820 
25821 
At3EndOfSequence(SeqLocPtr slp,BioseqPtr bsp)25822 static Boolean At3EndOfSequence (SeqLocPtr slp, BioseqPtr bsp)
25823 {
25824   Uint1 strand;
25825   Int4  stop;
25826   Boolean at_end = FALSE;
25827 
25828   if (slp == NULL || bsp == NULL) return FALSE;
25829 
25830   strand = SeqLocStrand (slp);
25831 
25832   if (strand == Seq_strand_minus) {
25833     stop = SeqLocStart (slp);
25834     if (stop == 0) {
25835       at_end = TRUE;
25836     }
25837   } else {
25838     stop = SeqLocStop (slp);
25839     if (stop == bsp->length - 1) {
25840       at_end = TRUE;
25841     }
25842   }
25843   return at_end;
25844 }
25845 
25846 
HasGoodStopCodon(SeqFeatPtr sfp)25847 static Boolean HasGoodStopCodon (SeqFeatPtr sfp)
25848 {
25849   ByteStorePtr bs;
25850   CharPtr      prot;
25851   Boolean      has_stop = FALSE;
25852 
25853   if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
25854     bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE);
25855     if (bs != NULL) {
25856       prot = BSMerge (bs, NULL);
25857       bs = BSFree (bs);
25858       if (prot != NULL && prot[StringLen (prot) - 1] == '*') {
25859         has_stop = TRUE;
25860       }
25861       prot = MemFree (prot);
25862     }
25863   }
25864   return has_stop;
25865 }
25866 
25867 
ApplyPartial3SetActionToSeqFeat(Partial3SetActionPtr action,SeqFeatPtr sfp)25868 static Boolean ApplyPartial3SetActionToSeqFeat (Partial3SetActionPtr action, SeqFeatPtr sfp)
25869 {
25870   Boolean      rval = FALSE;
25871   Boolean      make_partial = FALSE;
25872   Uint1        strand;
25873   BioseqPtr    bsp;
25874   Boolean      partial5, partial3;
25875 
25876   if (action == NULL || sfp == NULL) return FALSE;
25877   bsp = BioseqFindFromSeqLoc (sfp->location);
25878   strand = SeqLocStrand (sfp->location);
25879 
25880   switch (action->constraint) {
25881     case Partial_3_set_constraint_all:
25882       make_partial = TRUE;
25883       break;
25884     case Partial_3_set_constraint_at_end:
25885       make_partial = At3EndOfSequence (sfp->location, bsp);
25886       break;
25887     case Partial_3_set_constraint_bad_end:
25888       make_partial = HasGoodStopCodon (sfp);
25889       break;
25890   }
25891 
25892   if (make_partial) {
25893     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
25894     if (!partial3) {
25895       SetSeqLocPartial (sfp->location, partial5, TRUE);
25896       if (action->extend && bsp != NULL) {
25897         ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
25898       }
25899       rval = TRUE;
25900     }
25901   }
25902   return rval;
25903 }
25904 
25905 
ApplyClear3PartialToSeqFeat(Int4 action,SeqFeatPtr sfp)25906 static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
25907 {
25908   Boolean rval = FALSE, clear_partial = FALSE;
25909   Boolean partial5, partial3;
25910 
25911   if (sfp == NULL) return FALSE;
25912 
25913   switch (action) {
25914     case Partial_3_clear_constraint_all:
25915       clear_partial = TRUE;
25916       break;
25917     case Partial_3_clear_constraint_not_at_end:
25918       clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location));
25919       break;
25920     case Partial_3_clear_constraint_good_end:
25921       clear_partial = HasGoodStopCodon(sfp);
25922       break;
25923   }
25924   if (clear_partial) {
25925     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
25926     if (partial3) {
25927       SetSeqLocPartial (sfp->location, partial5, FALSE);
25928       rval = TRUE;
25929     }
25930   }
25931   return rval;
25932 }
25933 
25934 
ApplyPartialBothSetActionToSeqFeat(PartialBothSetActionPtr action,SeqFeatPtr sfp)25935 static Boolean ApplyPartialBothSetActionToSeqFeat (PartialBothSetActionPtr action, SeqFeatPtr sfp)
25936 {
25937   Boolean      rval = FALSE;
25938   Boolean      make_partial = FALSE;
25939   Uint1        strand;
25940   BioseqPtr    bsp;
25941   Boolean      partial5, partial3;
25942 
25943   if (action == NULL || sfp == NULL) return FALSE;
25944   bsp = BioseqFindFromSeqLoc (sfp->location);
25945   strand = SeqLocStrand (sfp->location);
25946 
25947   switch (action->constraint) {
25948     case Partial_both_set_constraint_all:
25949       make_partial = TRUE;
25950       break;
25951     case Partial_both_set_constraint_at_end:
25952       make_partial = At5EndOfSequence (sfp->location, bsp) && At3EndOfSequence (sfp->location, bsp);
25953       break;
25954   }
25955 
25956   if (make_partial) {
25957     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
25958     if (!partial5 || !partial3) {
25959       SetSeqLocPartial (sfp->location, TRUE, TRUE);
25960       if (action->extend && bsp != NULL) {
25961         ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
25962         if (ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) {
25963           ChooseBestFrame (sfp);
25964         }
25965       }
25966       rval = TRUE;
25967     }
25968   }
25969   return rval;
25970 }
25971 
25972 
ApplyClearBothPartialToSeqFeat(Int4 action,SeqFeatPtr sfp)25973 static Boolean ApplyClearBothPartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
25974 {
25975   Boolean rval = FALSE, clear_partial = FALSE;
25976   Boolean partial5, partial3;
25977   BioseqPtr bsp;
25978 
25979   if (sfp == NULL) return FALSE;
25980 
25981   switch (action) {
25982     case Partial_both_clear_constraint_all:
25983       clear_partial = TRUE;
25984       break;
25985     case Partial_both_clear_constraint_not_at_end:
25986       bsp = BioseqFindFromSeqLoc (sfp->location);
25987       clear_partial = !At5EndOfSequence (sfp->location, bsp) && !At3EndOfSequence(sfp->location, bsp);
25988       break;
25989     case Partial_3_clear_constraint_good_end:
25990       clear_partial = !HasGoodStopCodon(sfp);
25991       break;
25992   }
25993   if (clear_partial) {
25994     CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
25995     if (partial5 || partial3) {
25996       SetSeqLocPartial (sfp->location, FALSE, FALSE);
25997       rval = TRUE;
25998     }
25999   }
26000   return rval;
26001 }
26002 
26003 
ApplyConvertLocationToSeqFeat(Int4 convert_location,SeqFeatPtr sfp)26004 static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp)
26005 {
26006   Boolean hasNulls, rval = FALSE;
26007   SeqLocPtr slp;
26008   BioseqPtr bsp;
26009   Boolean   partial5, partial3;
26010 
26011   if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location))== NULL) {
26012     return FALSE;
26013   }
26014 
26015   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
26016     hasNulls = LocationHasNullsBetween (sfp->location);
26017     switch (convert_location)
26018     {
26019       case Convert_location_type_join :
26020         if (hasNulls)
26021         {
26022           slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE);
26023             sfp->location = SeqLocFree (sfp->location);
26024             sfp->location = slp;
26025             if (bsp->repr == Seq_repr_seg)
26026             {
26027               slp = SegLocToPartsEx (bsp, sfp->location, FALSE);
26028               sfp->location = SeqLocFree (sfp->location);
26029               sfp->location = slp;
26030               hasNulls = LocationHasNullsBetween (sfp->location);
26031               sfp->partial = (sfp->partial || hasNulls);
26032             }
26033             FreeAllFuzz (sfp->location);
26034             SetSeqLocPartial (sfp->location, partial5, partial3);
26035         rval = TRUE;
26036         }
26037         break;
26038       case Convert_location_type_order :
26039         if (!hasNulls)
26040         {
26041             slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE);
26042         sfp->location = SeqLocFree (sfp->location);
26043             sfp->location = slp;
26044             if (bsp->repr == Seq_repr_seg)
26045             {
26046               slp = SegLocToPartsEx (bsp, sfp->location, TRUE);
26047               sfp->location = SeqLocFree (sfp->location);
26048               sfp->location = slp;
26049               hasNulls = LocationHasNullsBetween (sfp->location);
26050               sfp->partial = (sfp->partial || hasNulls);
26051             }
26052             FreeAllFuzz (sfp->location);
26053             SetSeqLocPartial (sfp->location, partial5, partial3);
26054         rval = TRUE;
26055         }
26056         break;
26057       case Convert_location_type_merge :
26058       if (sfp->location->choice != SEQLOC_INT) {
26059           slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE);
26060           sfp->location = SeqLocFree (sfp->location);
26061           sfp->location = slp;
26062             SetSeqLocPartial (sfp->location, partial5, partial3);
26063         rval = TRUE;
26064       }
26065       default:
26066         break;
26067     }
26068   return rval;
26069 }
26070 
26071 
ExtendSeqFeat5(SeqFeatPtr sfp)26072 static Boolean ExtendSeqFeat5 (SeqFeatPtr sfp)
26073 {
26074   BioseqPtr bsp;
26075   CdRegionPtr crp;
26076   Int4        start_diff;
26077   Boolean     partial5, partial3;
26078 
26079   if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL)
26080   {
26081     return FALSE;
26082   }
26083 
26084   if ((start_diff = ExtendSeqLocToEnd (sfp->location, bsp, TRUE)) > 0)
26085   {
26086     if (sfp->data.choice == SEQFEAT_CDREGION) {
26087       CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
26088         if (partial5) {
26089         crp = (CdRegionPtr) sfp->data.value.ptrvalue;
26090         if (crp != NULL) {
26091           if (crp->frame == 0) {
26092               crp->frame = 1;
26093           }
26094           crp->frame = (crp->frame + start_diff - 1) % 3 + 1;
26095         }
26096       }
26097     }
26098     return TRUE;
26099   }
26100   else
26101   {
26102     return FALSE;
26103   }
26104 }
26105 
26106 
ExtendSeqFeat3(SeqFeatPtr sfp)26107 static Boolean ExtendSeqFeat3 (SeqFeatPtr sfp)
26108 {
26109   BioseqPtr bsp;
26110   Uint1     strand;
26111   Int4      stop_before, stop_after;
26112 
26113   if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL)
26114   {
26115     return FALSE;
26116   }
26117   strand = SeqLocStrand (sfp->location);
26118   if (strand == Seq_strand_minus) {
26119     stop_before = SeqLocStart (sfp->location);
26120   } else {
26121     stop_before = SeqLocStop (sfp->location);
26122   }
26123   ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
26124   if (strand == Seq_strand_minus) {
26125     stop_after = SeqLocStart (sfp->location);
26126   } else {
26127     stop_after = SeqLocStop (sfp->location);
26128   }
26129   if (stop_before == stop_after)
26130   {
26131     return FALSE;
26132   } else {
26133     return TRUE;
26134   }
26135 }
26136 
26137 
ExtendSeqInt5ToPos(SeqIntPtr sint,Int4 pos)26138 static Int4 ExtendSeqInt5ToPos (SeqIntPtr sint, Int4 pos)
26139 {
26140   Int4      distance = 0;
26141 
26142   if (sint == NULL) {
26143     return FALSE;
26144   }
26145 
26146   if (sint->strand == Seq_strand_minus) {
26147     distance = pos - sint->to;
26148     sint->to = pos;
26149   } else {
26150     distance = sint->from - pos;
26151     sint->from = pos;
26152   }
26153 
26154   return distance;
26155 }
26156 
26157 
ExtendSeqInt3ToPos(SeqIntPtr sint,Int4 pos)26158 static Int4 ExtendSeqInt3ToPos (SeqIntPtr sint, Int4 pos)
26159 {
26160   Int4      distance = 0;
26161 
26162   if (sint == NULL) {
26163     return FALSE;
26164   }
26165 
26166   if (sint->strand == Seq_strand_minus) {
26167     distance = sint->from - pos;
26168     sint->from = pos;
26169   } else {
26170     distance = pos - sint->to;
26171     sint->to = pos;
26172   }
26173 
26174   return distance;
26175 }
26176 
26177 
ExtendSeqLocToPos(SeqLocPtr slp,Int4 pos,Boolean end5)26178 static Int4 ExtendSeqLocToPos (SeqLocPtr slp, Int4 pos, Boolean end5)
26179 {
26180   Int4 diff = 0;
26181   SeqLocPtr slp_index;
26182 
26183   if (slp == NULL) return 0;
26184 
26185   switch (slp->choice)
26186   {
26187     case SEQLOC_INT:
26188       if (end5) {
26189         diff = ExtendSeqInt5ToPos (slp->data.ptrvalue, pos);
26190       } else {
26191         diff = ExtendSeqInt3ToPos (slp->data.ptrvalue, pos);
26192       }
26193       break;
26194     case SEQLOC_MIX:
26195       case SEQLOC_PACKED_INT:
26196       if (end5) {
26197         /* take the first one */
26198         diff = ExtendSeqLocToPos (slp->data.ptrvalue, pos, end5);
26199       } else {
26200         /* take the last one */
26201         for (slp_index = slp->data.ptrvalue; slp_index != NULL && slp_index->next != NULL; slp_index = slp_index->next) {
26202         }
26203         if (slp_index != NULL) {
26204           diff = ExtendSeqLocToPos (slp_index, pos, end5);
26205         }
26206       }
26207       break;
26208   }
26209 
26210   return diff;
26211 }
26212 
26213 
s_StrandsMatch(Uint1 strand1,Uint1 strand2)26214 static Boolean s_StrandsMatch (Uint1 strand1, Uint1 strand2)
26215 {
26216   Boolean rval = FALSE;
26217 
26218   if (strand1 == Seq_strand_minus) {
26219     if (strand2 == Seq_strand_minus) {
26220       rval = TRUE;
26221     }
26222   } else {
26223     if (strand2 != Seq_strand_minus) {
26224       rval = TRUE;
26225     }
26226   }
26227   return rval;
26228 }
26229 
26230 
FindPosBeforeFeat(SeqFeatPtr sfp,ExtendToFeaturePtr efp)26231 static Int4 FindPosBeforeFeat (SeqFeatPtr sfp, ExtendToFeaturePtr efp)
26232 {
26233   BioseqPtr bsp;
26234   Int4      featdef, start, stop, tmp, pos = -1;
26235   SeqMgrFeatContext context;
26236   SeqFeatPtr  regulator = NULL, candidate;
26237   Uint1       strand;
26238 
26239   if (sfp == NULL || efp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL)
26240   {
26241     return -1;
26242   }
26243 
26244   featdef = GetFeatdefFromFeatureType (efp->type);
26245   start = SeqLocStart (sfp->location);
26246   stop = SeqLocStop (sfp->location);
26247   if (stop < start) {
26248     tmp = stop;
26249     stop = start;
26250     start = tmp;
26251   }
26252   strand = SeqLocStrand (sfp->location);
26253 
26254   candidate = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context);
26255   /* take last match before feature */
26256   while (candidate != NULL && (context.right < start || (efp->include_feat && context.left < start)))
26257   {
26258     if (s_StrandsMatch(strand, context.strand) && DoesValueMatchQuantityConstraint (start - context.right, efp->distance)) {
26259       regulator = candidate;
26260       if (efp->include_feat)
26261       {
26262         pos = context.left;
26263       }
26264       else
26265       {
26266         pos = context.right + 1;
26267       }
26268     }
26269     candidate = SeqMgrGetNextFeature (bsp, candidate, 0, featdef, &context);
26270   }
26271   return pos;
26272 }
26273 
26274 
FindPosAfterFeat(SeqFeatPtr sfp,ExtendToFeaturePtr efp)26275 static Int4 FindPosAfterFeat (SeqFeatPtr sfp, ExtendToFeaturePtr efp)
26276 {
26277   BioseqPtr bsp;
26278   Int4      featdef, start, stop, tmp, pos = -1;
26279   SeqMgrFeatContext context;
26280   SeqFeatPtr  regulator = NULL, candidate;
26281   Uint1       strand;
26282 
26283   if (sfp == NULL || efp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location)) == NULL)
26284   {
26285     return -1;
26286   }
26287 
26288   featdef = GetFeatdefFromFeatureType (efp->type);
26289   start = SeqLocStart (sfp->location);
26290   stop = SeqLocStop (sfp->location);
26291   if (stop < start) {
26292     tmp = stop;
26293     stop = start;
26294     start = tmp;
26295   }
26296   strand = SeqLocStrand (sfp->location);
26297 
26298   candidate = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context);
26299   /* take first match after feature */
26300   while (candidate != NULL && (context.left < stop || (efp->include_feat && context.right < stop) || !s_StrandsMatch(strand, context.strand)))
26301   {
26302     candidate = SeqMgrGetNextFeature (bsp, candidate, 0, featdef, &context);
26303   }
26304 
26305   while (candidate != NULL && regulator == NULL)
26306   {
26307     if (DoesValueMatchQuantityConstraint (context.left - stop, efp->distance) && s_StrandsMatch(strand, context.strand)) {
26308       regulator = candidate;
26309       if (efp->include_feat)
26310       {
26311         pos = context.right;
26312       }
26313       else
26314       {
26315         pos = context.left - 1;
26316       }
26317     }
26318     else
26319     {
26320       candidate = SeqMgrGetNextFeature (bsp, candidate, 0, featdef, &context);
26321     }
26322   }
26323 
26324   return pos;
26325 }
26326 
26327 
ExtendSeqFeatToFeat(SeqFeatPtr sfp,ExtendToFeaturePtr efp,Boolean end5)26328 static Boolean ExtendSeqFeatToFeat (SeqFeatPtr sfp, ExtendToFeaturePtr efp, Boolean end5)
26329 {
26330   Int4        pos = -1;
26331   Uint2       strand;
26332   CdRegionPtr crp;
26333   Int4        start_diff;
26334   Boolean     partial5, partial3;
26335 
26336   if (sfp == NULL)
26337   {
26338     return FALSE;
26339   }
26340 
26341   strand = SeqLocStrand (sfp->location);
26342 
26343   if (end5)
26344   {
26345     if (strand == Seq_strand_minus)
26346     {
26347       pos = FindPosAfterFeat (sfp, efp);
26348     }
26349     else
26350     {
26351       pos = FindPosBeforeFeat (sfp, efp);
26352     }
26353   }
26354   else
26355   {
26356     if (strand == Seq_strand_minus)
26357     {
26358       pos = FindPosBeforeFeat (sfp, efp);
26359     }
26360     else
26361     {
26362       pos = FindPosAfterFeat (sfp, efp);
26363     }
26364   }
26365   if (pos > -1 && (start_diff = ExtendSeqLocToPos (sfp->location, pos, end5)) > 0)
26366   {
26367     if (end5 && sfp->data.choice == SEQFEAT_CDREGION) {
26368       CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
26369         if (partial5) {
26370         crp = (CdRegionPtr) sfp->data.value.ptrvalue;
26371         if (crp != NULL) {
26372           if (crp->frame == 0) {
26373               crp->frame = 1;
26374           }
26375           crp->frame = (crp->frame + start_diff - 1) % 3 + 1;
26376         }
26377       }
26378     }
26379     return TRUE;
26380   }
26381   else
26382   {
26383     return FALSE;
26384   }
26385 }
26386 
26387 
ApplyLocationEditTypeToSeqFeat(ValNodePtr action,SeqFeatPtr sfp)26388 static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp)
26389 {
26390   Boolean rval = FALSE;
26391 
26392   if (action == NULL || sfp == NULL) {
26393     return FALSE;
26394   }
26395 
26396   switch (action->choice) {
26397     case LocationEditType_strand:
26398       rval = ApplyEditLocationStrandToSeqFeat (action->data.ptrvalue, sfp);
26399       break;
26400     case LocationEditType_set_5_partial:
26401       rval = ApplyPartial5SetActionToSeqFeat (action->data.ptrvalue, sfp);
26402       break;
26403     case LocationEditType_clear_5_partial:
26404       rval = ApplyClear5PartialToSeqFeat (action->data.intvalue, sfp);
26405       break;
26406     case LocationEditType_set_3_partial:
26407       rval = ApplyPartial3SetActionToSeqFeat (action->data.ptrvalue, sfp);
26408       break;
26409     case LocationEditType_clear_3_partial:
26410       rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp);
26411       break;
26412     case LocationEditType_set_both_partial:
26413       rval = ApplyPartialBothSetActionToSeqFeat (action->data.ptrvalue, sfp);
26414       break;
26415     case LocationEditType_clear_both_partial:
26416       rval = ApplyClearBothPartialToSeqFeat (action->data.intvalue, sfp);
26417       break;
26418     case LocationEditType_convert:
26419       rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp);
26420       break;
26421     case LocationEditType_extend_5:
26422       rval = ExtendSeqFeat5 (sfp);
26423       break;
26424     case LocationEditType_extend_3:
26425       rval = ExtendSeqFeat3 (sfp);
26426       break;
26427     case LocationEditType_extend_5_to_feat:
26428       rval = ExtendSeqFeatToFeat (sfp, action->data.ptrvalue, TRUE);
26429       break;
26430     case LocationEditType_extend_3_to_feat:
26431       rval = ExtendSeqFeatToFeat (sfp, action->data.ptrvalue, FALSE);
26432       break;
26433   }
26434   return rval;
26435 }
26436 
26437 
ApplyEditFeatureLocationActionToSeqEntry(EditFeatureLocationActionPtr action,SeqEntryPtr sep,FILE * log_fp)26438 static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionPtr action, SeqEntryPtr sep, FILE *log_fp)
26439 {
26440   ConvertAndRemoveFeatureCollectionData d;
26441   ValNodePtr vnp;
26442   SeqFeatPtr sfp, gene;
26443   Int4       num_affected = 0;
26444   /* variables for logging */
26445   CharPtr    old_loc = NULL, new_loc;
26446   Boolean    retranslated, adjusted_gene;
26447 
26448   if (action == NULL) return 0;
26449 
26450   d.featdef = GetFeatdefFromFeatureType (action->type);
26451   d.constraint_set = action->constraint;
26452   d.feature_list = NULL;
26453 
26454   VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback);
26455   for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) {
26456     sfp = vnp->data.ptrvalue;
26457     if (sfp != NULL) {
26458       if (log_fp != NULL) {
26459         old_loc = SeqLocPrintUseBestID (sfp->location);
26460       }
26461       if (sfp->data.choice != SEQFEAT_GENE && action->also_edit_gene) {
26462         gene = GetGeneForFeature (sfp);
26463       } else {
26464         gene = NULL;
26465       }
26466       if (ApplyLocationEditTypeToSeqFeat (action->action, sfp)) {
26467         adjusted_gene = FALSE;
26468         if (gene != NULL && ApplyLocationEditTypeToSeqFeat (action->action, gene)) {
26469           adjusted_gene = TRUE;
26470         }
26471         retranslated = FALSE;
26472         if (sfp->data.choice == SEQFEAT_CDREGION && action->retranslate_cds) {
26473           SeqMgrIndexFeatures(sfp->idx.entityID, NULL);
26474           retranslated = RetranslateOneCDS (sfp, sfp->idx.entityID, TRUE, TRUE);
26475         }
26476         num_affected++;
26477         if (log_fp != NULL) {
26478           new_loc = SeqLocPrintUseBestID (sfp->location);
26479           fprintf (log_fp, "Changed location %s to %s%s%s\n", old_loc, new_loc,
26480                            retranslated ? " and retranslated protein" : "",
26481                            adjusted_gene ? " and adjusted gene location" : "");
26482           new_loc = MemFree (new_loc);
26483         }
26484       }
26485       old_loc = MemFree (old_loc);
26486     }
26487   }
26488   return num_affected;
26489 }
26490 
26491 
26492 typedef struct molinfoblocklog {
26493   MolinfoBlockPtr mib;
26494   FILE *log_fp;
26495   Boolean any_change;
26496 } MolInfoBlockLogData, PNTR MolInfoBlockLogPtr;
26497 
ApplyMolinfoBlockCallback(BioseqPtr bsp,Pointer data)26498 static void ApplyMolinfoBlockCallback (BioseqPtr bsp, Pointer data)
26499 {
26500   MolInfoBlockLogPtr ml;
26501   MolinfoBlockPtr mib;
26502   ValNodePtr      field;
26503   MolInfoPtr      mip;
26504   Char            id_buf[100];
26505   CharPtr         field_name;
26506 
26507   if (bsp == NULL) {
26508     return;
26509   }
26510 
26511   ml = (MolInfoBlockLogPtr) data;
26512   if (ml == NULL || ml->mib == NULL) {
26513     return;
26514   }
26515   mib = ml->mib;
26516 
26517   if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, mib->constraint)) {
26518     return;
26519   }
26520 
26521   mip = GetMolInfoForBioseq (bsp);
26522 
26523   for (field = mib->from_list; field != NULL; field = field->next) {
26524     switch (field->choice) {
26525       case MolinfoField_molecule:
26526         if (mip == NULL || mip->biomol != BiomolFromMoleculeType (field->data.intvalue)) {
26527           return;
26528         }
26529         break;
26530       case MolinfoField_technique:
26531         if (mip == NULL || mip->tech != TechFromTechniqueType (field->data.intvalue)) {
26532           return;
26533         }
26534         break;
26535       case MolinfoField_completedness:
26536         if (mip == NULL || mip->completeness != CompletenessFromCompletednessType (field->data.intvalue)) {
26537           return;
26538         }
26539         break;
26540       case MolinfoField_mol_class:
26541         if (bsp->mol != MolFromMoleculeClassType (field->data.intvalue)) {
26542           return;
26543         }
26544         break;
26545       case MolinfoField_topology:
26546         if (bsp->topology != TopologyFromTopologyType (field->data.intvalue)) {
26547           return;
26548         }
26549         break;
26550       case MolinfoField_strand:
26551         if (bsp->strand != StrandFromStrandType (field->data.intvalue)) {
26552           return;
26553         }
26554         break;
26555     }
26556   }
26557 
26558 
26559   for (field = mib->to_list; field != NULL; field = field->next) {
26560     if (SetSequenceQualOnBioseq (bsp, field)) {
26561       if (ml->log_fp != NULL) {
26562         SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
26563         field_name = GetSequenceQualName (field);
26564         fprintf (ml->log_fp, "Changed to %s for %s\n", field_name, id_buf);
26565         field_name = MemFree (field_name);
26566       }
26567       ml->any_change = TRUE;
26568     }
26569   }
26570 }
26571 
26572 
ApplyMolinfoBlockToSeqEntryEx(SeqEntryPtr sep,MolinfoBlockPtr mib,FILE * log_fp)26573 static Boolean ApplyMolinfoBlockToSeqEntryEx (SeqEntryPtr sep, MolinfoBlockPtr mib, FILE *log_fp)
26574 {
26575   MolInfoBlockLogData md;
26576 
26577   md.any_change = FALSE;
26578   md.log_fp = log_fp;
26579   md.mib = mib;
26580 
26581   VisitBioseqsInSep (sep, &md, ApplyMolinfoBlockCallback);
26582   return md.any_change;
26583 }
26584 
26585 
ApplyMolinfoBlockToSeqEntry(SeqEntryPtr sep,MolinfoBlockPtr mib)26586 NLM_EXTERN void ApplyMolinfoBlockToSeqEntry (SeqEntryPtr sep, MolinfoBlockPtr mib)
26587 {
26588   ApplyMolinfoBlockToSeqEntryEx (sep, mib, NULL);
26589 }
26590 
26591 
26592 static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp);
26593 static Boolean FixAuthorNamesCaps (FixAuthorCapsPtr action, SeqEntryPtr sep, FILE *log_fp);
26594 
ApplyFixCapsToSeqEntry(SeqEntryPtr sep,FixCapsActionPtr action,FILE * log_fp)26595 static Boolean ApplyFixCapsToSeqEntry (SeqEntryPtr sep, FixCapsActionPtr action, FILE *log_fp)
26596 {
26597   Boolean any_change = FALSE;
26598 
26599   if (sep == NULL || action == NULL) {
26600     return FALSE;
26601   }
26602 
26603   switch (action->choice) {
26604     case FixCapsAction_pub:
26605       any_change = ApplyFixPubCapsToSeqEntry (action->data.ptrvalue, sep, log_fp);
26606       break;
26607     case FixCapsAction_src_country:
26608       any_change = FixupCountryQualsWithLog (sep, FALSE, log_fp);
26609       break;
26610     case FixCapsAction_mouse_strain:
26611       any_change = FixupMouseStrains (sep, log_fp);
26612       break;
26613     case FixCapsAction_src_qual:
26614       any_change = FixSrcQualCaps (sep, action->data.intvalue, log_fp);
26615       break;
26616     case FixCapsAction_author:
26617       any_change = FixAuthorNamesCaps (action->data.ptrvalue, sep, log_fp);
26618       break;
26619   }
26620 
26621   return any_change;
26622 }
26623 
26624 
FixCollectionDatesCallback(BioSourcePtr biop,Pointer data)26625 static void FixCollectionDatesCallback (BioSourcePtr biop, Pointer data)
26626 {
26627   LogInfoPtr lip;
26628   SubSourcePtr ssp;
26629   CharPtr      new_date;
26630 
26631   if (biop == NULL) {
26632     return;
26633   }
26634 
26635   lip = (LogInfoPtr) data;
26636 
26637   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
26638     if (ssp->subtype == SUBSRC_collection_date) {
26639       new_date = ReformatDateWithMonthNames (ssp->name);
26640       if (new_date != NULL && StringCmp (new_date, ssp->name) != 0) {
26641         if (lip != NULL) {
26642           if (lip->fp != NULL) {
26643             fprintf (lip->fp, "Changed '%s' to '%s'\n", ssp->name, new_date);
26644           }
26645           lip->data_in_log = TRUE;
26646         }
26647         ssp->name = MemFree (ssp->name);
26648         ssp->name = new_date;
26649         new_date = NULL;
26650       }
26651       new_date = MemFree (new_date);
26652     }
26653   }
26654 }
26655 
26656 
FindBadLatLon(BioSourcePtr biop)26657 NLM_EXTERN SubSourcePtr FindBadLatLon (BioSourcePtr biop)
26658 {
26659   SubSourcePtr ssp, ssp_bad = NULL;
26660   Boolean      format_ok, lat_in_range, lon_in_range, precision_ok;
26661 
26662   if (biop == NULL)
26663   {
26664     return NULL;
26665   }
26666 
26667   for (ssp = biop->subtype; ssp != NULL && ssp_bad == NULL; ssp = ssp->next)
26668   {
26669     if (ssp->subtype == SUBSRC_lat_lon)
26670     {
26671       IsCorrectLatLonFormat (ssp->name, &format_ok, &precision_ok, &lat_in_range, &lon_in_range);
26672       if (!format_ok || !lat_in_range || !lon_in_range)
26673       {
26674         ssp_bad = ssp;
26675       }
26676     }
26677   }
26678   return ssp_bad;
26679 }
26680 
26681 
FindBadLatLonDesc(SeqDescrPtr sdp,Pointer userdata)26682 static void FindBadLatLonDesc (SeqDescrPtr sdp, Pointer userdata)
26683 {
26684   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL)
26685   {
26686     return;
26687   }
26688   if (FindBadLatLon (sdp->data.ptrvalue) != NULL)
26689   {
26690     ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp);
26691   }
26692 }
26693 
26694 
FindBadLatLonFeat(SeqFeatPtr sfp,Pointer userdata)26695 static void FindBadLatLonFeat (SeqFeatPtr sfp, Pointer userdata)
26696 {
26697   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL)
26698   {
26699     return;
26700   }
26701   if (FindBadLatLon (sfp->data.value.ptrvalue) != NULL)
26702   {
26703     ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp);
26704   }
26705 }
26706 
26707 
FindBadLatLonObjects(SeqEntryPtr sep)26708 NLM_EXTERN ValNodePtr FindBadLatLonObjects (SeqEntryPtr sep)
26709 {
26710   ValNodePtr list = NULL;
26711 
26712   VisitDescriptorsInSep (sep, &list, FindBadLatLonDesc);
26713   VisitFeaturesInSep (sep, &list, FindBadLatLonFeat);
26714   return list;
26715 }
26716 
26717 
AddAltitudeToSubSourceNote(BioSourcePtr biop,CharPtr extra_text)26718 static void AddAltitudeToSubSourceNote (BioSourcePtr biop, CharPtr extra_text)
26719 {
26720   SubSourcePtr ssp;
26721   CharPtr      new_note, new_note_fmt = "%s%saltitude:%s";
26722 
26723   if (biop == NULL || StringHasNoText (extra_text))
26724   {
26725     return;
26726   }
26727 
26728   ssp = biop->subtype;
26729   while (ssp != NULL && ssp->subtype != SUBSRC_other)
26730   {
26731     ssp = ssp->next;
26732   }
26733   if (ssp == NULL)
26734   {
26735     ssp = SubSourceNew ();
26736     ssp->subtype = SUBSRC_other;
26737     ssp->next = biop->subtype;
26738     biop->subtype = ssp;
26739   }
26740   new_note = (CharPtr) MemNew (sizeof (Char) * (StringLen (ssp->name)
26741                                                 + StringLen (extra_text)
26742                                                 + StringLen (new_note_fmt)));
26743   sprintf (new_note, new_note_fmt, ssp->name == NULL ? "" : ssp->name,
26744                                    ssp->name == NULL ? "" : "; ",
26745                                    extra_text);
26746   ssp->name = MemFree (ssp->name);
26747   ssp->name = new_note;
26748 }
26749 
26750 
LatLonAutocorrectList(FILE * fp,ValNodePtr object_list)26751 NLM_EXTERN Boolean LatLonAutocorrectList (FILE *fp, ValNodePtr object_list)
26752 {
26753   ValNodePtr vnp;
26754   SeqDescrPtr sdp;
26755   BioSourcePtr biop;
26756   SubSourcePtr bad_ssp;
26757   CharPtr      fix, extra_text;
26758   Boolean      any_change = FALSE;
26759 
26760   if (object_list == NULL) return FALSE;
26761 
26762   for (vnp = object_list; vnp != NULL; vnp = vnp->next)
26763   {
26764     if (vnp->choice != OBJ_SEQDESC) continue;
26765     sdp = vnp->data.ptrvalue;
26766     if (sdp != NULL && sdp->choice == Seq_descr_source)
26767     {
26768       biop = (BioSourcePtr) sdp->data.ptrvalue;
26769       bad_ssp = FindBadLatLon (biop);
26770       if (bad_ssp != NULL)
26771       {
26772         fix = FixLatLonFormat (bad_ssp->name);
26773         if (fix != NULL)
26774         {
26775           extra_text = StringChr (fix, ',');
26776           if (extra_text != NULL)
26777           {
26778             *extra_text = 0;
26779             extra_text++;
26780             while (isspace (*extra_text))
26781             {
26782               extra_text++;
26783             }
26784           }
26785           if (fp != NULL) {
26786             fprintf (fp, "Corrected %s to %s\n", bad_ssp->name, fix);
26787           }
26788           bad_ssp->name = MemFree (bad_ssp->name);
26789           bad_ssp->name = fix;
26790           if (extra_text != NULL)
26791           {
26792             AddAltitudeToSubSourceNote (biop, extra_text);
26793             if (fp != NULL) {
26794               fprintf (fp, "Moved %s to subsource note\n", extra_text);
26795             }
26796           }
26797           any_change = TRUE;
26798         }
26799         else
26800         {
26801           if (fp != NULL) {
26802             fprintf (fp, "Unable to correct %s\n", bad_ssp->name);
26803           }
26804         }
26805       }
26806     }
26807   }
26808   return any_change;
26809 }
26810 
26811 
ReplaceiInSeq(CharPtr PNTR seq,LogInfoPtr lip)26812 static void ReplaceiInSeq (CharPtr PNTR seq, LogInfoPtr lip)
26813 {
26814   CharPtr cp, new_seq, src, dst;
26815   Int4    num_i = 0, num_extra = 0;
26816 
26817   if (seq == NULL) {
26818     return;
26819   }
26820 
26821   cp = StringISearch (*seq, "i");
26822   while (cp != NULL) {
26823     if (cp == *seq || *(cp - 1) != '<') {
26824       num_extra++;
26825     }
26826     if (*(cp + 1) != '>') {
26827       num_extra++;
26828     }
26829     num_i++;
26830     cp = StringISearch (cp + 1, "i");
26831   }
26832 
26833   if (num_extra != 0) {
26834     new_seq = (CharPtr) MemNew (sizeof (Char) * (StringLen (*seq) + 1 + num_extra));
26835     src = *seq;
26836     dst = new_seq;
26837     while (*src != 0) {
26838       if (*src == 'i' || *src == 'I') {
26839         if (src == *seq || *(src - 1) != '<') {
26840           *dst = '<';
26841           dst++;
26842         }
26843         *dst = 'i';
26844         dst++;
26845         if (*(src + 1) != '>') {
26846           *dst = '>';
26847           dst++;
26848         }
26849       } else {
26850         *dst = *src;
26851         dst++;
26852       }
26853       src++;
26854     }
26855     *dst = 0;
26856     if (lip != NULL) {
26857       if (lip->fp != NULL) {
26858         fprintf (lip->fp, "Changed primer sequence from '%s' to '%s'\n", *seq, new_seq);
26859       }
26860       lip->data_in_log = TRUE;
26861     }
26862 
26863     *seq = MemFree (*seq);
26864     *seq = new_seq;
26865   }
26866 }
26867 
26868 
FixiPCRPrimerSeqsCallback(BioSourcePtr biop,Pointer data)26869 NLM_EXTERN void FixiPCRPrimerSeqsCallback (BioSourcePtr biop, Pointer data)
26870 {
26871   PCRReactionSetPtr ps;
26872   PCRPrimerPtr p;
26873   LogInfoPtr   lip;
26874 
26875   if (biop == NULL) {
26876     return;
26877   }
26878   lip = (LogInfoPtr) data;
26879 
26880   for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
26881     for (p = ps->forward; p != NULL; p = p->next) {
26882       ReplaceiInSeq (&(p->seq), lip);
26883     }
26884     for (p = ps->reverse; p != NULL; p = p->next) {
26885       ReplaceiInSeq (&(p->seq), lip);
26886     }
26887   }
26888 }
26889 
26890 
26891 typedef struct fixproteinnameformat {
26892   Boolean any_change;
26893   FILE *fp;
26894   ValNodePtr orgnames;
26895 } FixProteinNameFormatData, PNTR FixProteinNameFormatPtr;
26896 
26897 
FixProteinNameFormatCallback(SeqFeatPtr sfp,Pointer data)26898 static void FixProteinNameFormatCallback (SeqFeatPtr sfp, Pointer data)
26899 {
26900   FixProteinNameFormatPtr f;
26901   ProtRefPtr prp;
26902   ValNodePtr vnp_n, vnp_p;
26903   CharPtr cp;
26904   Int4    len;
26905 
26906   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL
26907       || (f = (FixProteinNameFormatPtr) data) == NULL) {
26908     return;
26909   }
26910 
26911   for (vnp_n = f->orgnames; vnp_n != NULL; vnp_n = vnp_n->next) {
26912     for (vnp_p = prp->name; vnp_p != NULL; vnp_p = vnp_p->next) {
26913       if ((cp = StringISearch (vnp_p->data.ptrvalue, vnp_n->data.ptrvalue)) != NULL) {
26914         len = StringLen (vnp_n->data.ptrvalue);
26915         if (cp != vnp_p->data.ptrvalue
26916             && ((*(cp - 1) == '(' && *(cp + len) == ')') || (*(cp - 1) == '[' && *(cp + len) == ']'))) {
26917           cp--;
26918           len+= 2;
26919         }
26920         if (*(cp + len) == 0 && isspace (*(cp - 1))) {
26921           *(cp - 1) = 0;
26922           f->any_change = TRUE;
26923           if (f->fp != NULL) {
26924             fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue);
26925           }
26926         } else {
26927           if (isspace (*(cp + len))) {
26928             len ++;
26929           }
26930           StringCpy (cp, cp + len);
26931           f->any_change = TRUE;
26932           if (f->fp != NULL) {
26933             fprintf (f->fp, "Removed '%s' from protein name (now '%s')\n", (CharPtr) vnp_n->data.ptrvalue, (CharPtr) vnp_p->data.ptrvalue);
26934           }
26935         }
26936       }
26937     }
26938   }
26939 }
26940 
26941 
ApplyFixFormatToSeqEntry(SeqEntryPtr sep,FixFormatActionPtr action,FILE * log_fp)26942 static Boolean ApplyFixFormatToSeqEntry (SeqEntryPtr sep, FixFormatActionPtr action, FILE *log_fp)
26943 {
26944   LogInfoData lid;
26945   FixProteinNameFormatData protformat;
26946   ValNodePtr  list;
26947 
26948 
26949   if (sep == NULL || action == NULL) {
26950     return FALSE;
26951   }
26952 
26953   MemSet (&lid, 0, sizeof (LogInfoData));
26954   lid.fp = log_fp;
26955 
26956   switch (action->choice) {
26957     case FixFormatAction_collection_date:
26958       VisitBioSourcesInSep (sep, &lid, FixCollectionDatesCallback);
26959       break;
26960     case FixFormatAction_lat_lon:
26961       list = FindBadLatLonObjects (sep);
26962       lid.data_in_log = LatLonAutocorrectList (lid.fp, list);
26963       list = FreeObjectList (list);
26964       break;
26965     case FixFormatAction_primers:
26966       VisitBioSourcesInSep (sep, &lid, FixiPCRPrimerSeqsCallback);
26967       break;
26968     case FixFormatAction_protein_name:
26969       MemSet (&protformat, 0, sizeof (FixProteinNameFormatData));
26970       protformat.fp = log_fp;
26971       VisitBioSourcesInSep (sep, &(protformat.orgnames), GetOrgNamesInRecordCallback);
26972       VisitFeaturesInSep (sep, &protformat, FixProteinNameFormatCallback);
26973       protformat.orgnames = ValNodeFree (protformat.orgnames);
26974       lid.data_in_log = protformat.any_change;
26975       break;
26976   }
26977   return lid.data_in_log;
26978 }
26979 
26980 
26981 typedef struct replacepair {
26982   CharPtr find;
26983   CharPtr replace;
26984 } ReplacePairData, PNTR ReplacePairPtr;
26985 
26986 static ReplacePairData macro_spell_fixes[] = {
26987   {"Agricultrual", "Agricultural"},
26988   {"Agricultureal", "Agricultural"},
26989   {"Agricultrure", "Agriculture"},
26990   {"bioremidiation", "bioremediation"},
26991   {"Colledge", "College"},
26992   {"Insitiute", "Institute" },
26993   {"Instutite", "Institute" },
26994   {"instute", "Institute" },
26995   {"institue", "Institute" },
26996   {"insitute", "Institute" },
26997   {"insititute","Institute" },
26998   {"Instiute","Institute" },
26999   {"hpothetical", "hypothetical" },
27000   {"hyphotetical", "hypothetical" },
27001   {"hyphotheical", "hypothetical" },
27002   {"hypotehtical", "hypothetical" },
27003   {"hypotethical", "hypothetical" },
27004   {"hypotetical", "hypothetical" },
27005   {"hypotheical", "hypothetical" },
27006   {"hypotheitcal", "hypothetical" },
27007   {"hypothetcial", "hypothetical" },
27008   {"hypothetica", "hypothetical" },
27009   {"hypothteical", "hypothetical" },
27010   {"hypothtical", "hypothetical" },
27011   {"hypthetical", "hypothetical" },
27012   {"hyptothetical", "hypothetical" },
27013   {"idendification", "identification" },
27014   {"protien", "protein" },
27015   {"puatative", "putative" },
27016   {"puative", "putative" },
27017   {"puative", "putative" },
27018   {"putaitive", "putative" },
27019   {"putaitve", "putative" },
27020   {"putaive", "putative" },
27021   {"putataive", "putative" },
27022   {"putatitve", "putative" },
27023   {"putitive", "putative" },
27024   {"reseach", "research"},
27025   {"sequene", "sequence"},
27026   {"univeristy", "University" },
27027   {"univerisity", "University" },
27028   {"univercity", "University" },
27029   {"uiniversity", "University" },
27030   {"uinversity", "University" },
27031   {"univesity", "University" },
27032   {"uviversity", "University" },
27033   {"universtiy", "University" },
27034   {"unvierstity", "University" },
27035   {"univiersity", "University" },
27036   {"universtity", "University" },
27037   {"Unversity", "University" },
27038   {"Univresity", "University" },
27039   {NULL, NULL}};
27040 
27041 
SetFlagWhenChanged(Uint2 entityID,Uint4 itemID,Uint2 itemtype,Pointer userdata)27042 static void SetFlagWhenChanged (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Pointer userdata)
27043 {
27044   BoolPtr flag;
27045 
27046   if ((flag = (BoolPtr) userdata) != NULL) {
27047     *flag = TRUE;
27048   }
27049 }
27050 
27051 
SpellFixSeqEntry(SeqEntryPtr sep,Pointer data,FILE * log_fp)27052 static Boolean SpellFixSeqEntry (SeqEntryPtr sep, Pointer data, FILE *log_fp)
27053 {
27054   Boolean any_changes = FALSE, this_change;
27055   Uint2   entityID;
27056   Int4    i;
27057 
27058   entityID = ObjMgrGetEntityIDForChoice (sep);
27059   for (i = 0; macro_spell_fixes[i].find != NULL; i++) {
27060     this_change = FALSE;
27061     FindReplaceInEntity (entityID, macro_spell_fixes[i].find, macro_spell_fixes[i].replace, FALSE, TRUE, TRUE,
27062                          FALSE, 0, NULL, NULL, NULL, FALSE, SetFlagWhenChanged, &this_change);
27063     if (this_change) {
27064       if (log_fp != NULL) {
27065         fprintf (log_fp, "Replaced '%s' with '%s'\n", macro_spell_fixes[i].find, macro_spell_fixes[i].replace);
27066       }
27067       any_changes = TRUE;
27068     }
27069   }
27070   return any_changes;
27071 }
27072 
27073 
27074 typedef struct descriptortypename {
27075   Int4 descriptortype;
27076   Uint1 descriptor_choice;
27077   CharPtr descriptorname;
27078 } DescriptorTypeNameData, PNTR DescriptorTypeNamePtr;
27079 
27080 static DescriptorTypeNameData descriptortypename[] = {
27081  { Descriptor_type_all , 0 , "Any" } ,
27082  { Descriptor_type_title , Seq_descr_title , "Title" } ,
27083  { Descriptor_type_source , Seq_descr_source , "Source" } ,
27084  { Descriptor_type_publication , Seq_descr_pub , "Publication" } ,
27085  { Descriptor_type_comment , Seq_descr_comment , "Comment" } ,
27086  { Descriptor_type_genbank , Seq_descr_genbank , "GenBank" } ,
27087  { Descriptor_type_user , Seq_descr_user , "User" } ,
27088  { Descriptor_type_create_date , Seq_descr_create_date , "CreateDate" } ,
27089  { Descriptor_type_update_date , Seq_descr_update_date , "UpdateDate" } ,
27090  { Descriptor_type_mol_info , Seq_descr_molinfo , "MolInfo" } ,
27091  { Descriptor_type_structured_comment , Seq_descr_user , "StructuredComment" } ,
27092  { Descriptor_type_genome_project_id , Seq_descr_user , "GenomeProjectID" }
27093 };
27094 
27095 #define NUM_descriptortypename sizeof (descriptortypename) / sizeof (DescriptorTypeNameData)
27096 
GetDescriptorTypeFromDescriptorChoice(Uint1 descriptor_choice)27097 static Int4 GetDescriptorTypeFromDescriptorChoice (Uint1 descriptor_choice)
27098 {
27099   Int4 i;
27100 
27101   for (i = 0; i < NUM_descriptortypename; i++) {
27102     if (descriptor_choice == descriptortypename[i].descriptor_choice) {
27103       return descriptortypename[i].descriptortype;
27104     }
27105   }
27106   return -1;
27107 }
27108 
27109 
GetDescriptorChoiceFromDescriptorType(Int4 descriptortype)27110 static Uint1 GetDescriptorChoiceFromDescriptorType (Int4 descriptortype)
27111 {
27112   Int4 i;
27113 
27114   for (i = 0; i < NUM_descriptortypename; i++) {
27115     if (descriptortype == descriptortypename[i].descriptortype) {
27116       return descriptortypename[i].descriptor_choice;
27117     }
27118   }
27119   return SEQDESCR_MAX;
27120 }
27121 
27122 
GetDescriptorNameFromDescriptorType(Int4 descriptortype)27123 NLM_EXTERN CharPtr GetDescriptorNameFromDescriptorType (Int4 descriptortype)
27124 {
27125   CharPtr str = NULL;
27126   Int4 i;
27127 
27128   for (i = 0; i < NUM_descriptortypename && str == NULL; i++) {
27129     if (descriptortype == descriptortypename[i].descriptortype) {
27130       str = descriptortypename[descriptortype].descriptorname;
27131     }
27132   }
27133   if (str == NULL) {
27134     str = "Unknown descriptor type";
27135   }
27136   return str;
27137 }
27138 
27139 
AddAllDescriptorsToChoiceList(ValNodePtr PNTR descriptor_type_list)27140 NLM_EXTERN void AddAllDescriptorsToChoiceList (ValNodePtr PNTR descriptor_type_list)
27141 {
27142   Int4 i;
27143   ValNodePtr tmp_list = NULL;
27144 
27145   for (i = 0; i < NUM_descriptortypename; i++) {
27146     ValNodeAddPointer (&tmp_list, descriptortypename[i].descriptortype, StringSave (descriptortypename[i].descriptorname));
27147   }
27148   tmp_list = ValNodeSort (tmp_list, SortVnpByString);
27149   ValNodeLink (descriptor_type_list, tmp_list);
27150 }
27151 
27152 
27153 
DoesDescriptorMatchType(SeqDescrPtr sdp,Int4 descriptortype)27154 static Boolean DoesDescriptorMatchType (SeqDescrPtr sdp, Int4 descriptortype)
27155 {
27156   Uint1 descriptorchoice;
27157   UserObjectPtr uop;
27158 
27159   if (sdp == NULL) {
27160     return FALSE;
27161   } else if (descriptortype == Descriptor_type_all) {
27162     return TRUE;
27163   } else if ((descriptorchoice = GetDescriptorChoiceFromDescriptorType (descriptortype)) == SEQDESCR_MAX) {
27164     return FALSE;
27165   } else if (descriptorchoice != sdp->choice) {
27166     return FALSE;
27167   } else if (descriptortype == Descriptor_type_structured_comment) {
27168     if (sdp->choice == Seq_descr_user
27169         && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
27170             || uop->type == NULL
27171             || StringCmp (uop->type->str, "StructuredComment") != 0)) {
27172       return FALSE;
27173     } else {
27174       return TRUE;
27175     }
27176   } else if (descriptortype == Descriptor_type_genome_project_id) {
27177     if (sdp->choice == Seq_descr_user
27178         && ((uop = (UserObjectPtr) sdp->data.ptrvalue) == NULL
27179             || uop->type == NULL
27180             || StringCmp (uop->type->str, "GenomeProjectsDB") != 0)) {
27181       return FALSE;
27182     } else {
27183       return TRUE;
27184     }
27185 
27186   } else {
27187     return TRUE;
27188   }
27189 }
27190 
27191 
27192 typedef struct removedescriptoractioncollection {
27193   RemoveDescriptorActionPtr action;
27194   ValNodePtr obj_list;
27195 } RemoveDescriptorActionCollectionData, PNTR RemoveDescriptorActionCollectionPtr;
27196 
27197 
RemoveDescriptorCollectionCallback(SeqDescrPtr sdp,Pointer data)27198 static void RemoveDescriptorCollectionCallback (SeqDescrPtr sdp, Pointer data)
27199 {
27200   RemoveDescriptorActionCollectionPtr d;
27201 
27202   if (sdp == NULL || (d = (RemoveDescriptorActionCollectionPtr) data) == NULL
27203       || d->action == NULL) {
27204     return;
27205   }
27206 
27207   if (DoesDescriptorMatchType (sdp, d->action->type)
27208       && DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, d->action->constraint)) {
27209     ValNodeAddPointer (&(d->obj_list), OBJ_SEQDESC, sdp);
27210   }
27211 }
27212 
27213 
ApplyRemoveDescriptorActionToSeqEntry(RemoveDescriptorActionPtr action,SeqEntryPtr sep)27214 static Int4 ApplyRemoveDescriptorActionToSeqEntry (RemoveDescriptorActionPtr action, SeqEntryPtr sep)
27215 {
27216   RemoveDescriptorActionCollectionData d;
27217   SeqDescrPtr sdp;
27218   ObjValNodePtr ovp;
27219   ValNodePtr vnp;
27220   Int4       num_deleted = 0;
27221 
27222   if (action == NULL) return 0;
27223 
27224   d.action = action;
27225   d.obj_list = NULL;
27226 
27227   VisitDescriptorsInSep (sep, &d, RemoveDescriptorCollectionCallback);
27228   if (d.obj_list == NULL) {
27229     return 0;
27230   }
27231   for (vnp = d.obj_list; vnp != NULL; vnp = vnp->next) {
27232     sdp = vnp->data.ptrvalue;
27233     if (sdp != NULL && sdp->extended != 0) {
27234       ovp = (ObjValNodePtr) sdp;
27235       ovp->idx.deleteme = TRUE;
27236       num_deleted ++;
27237     }
27238   }
27239   DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
27240   return num_deleted;
27241 }
27242 
27243 
TrimStopsFromCompleteCodingRegionsCallback(SeqFeatPtr sfp,Pointer data)27244 static void TrimStopsFromCompleteCodingRegionsCallback (SeqFeatPtr sfp, Pointer data)
27245 {
27246   Boolean p5, p3;
27247   BioseqPtr protbsp;
27248   CharPtr   prot_str;
27249   Int4      len;
27250   /* variables for shortening protein features */
27251   SeqFeatPtr        prot_sfp;
27252   SeqMgrFeatContext fcontext;
27253   SeqIntPtr         sintp;
27254   /* variables for logging */
27255   LogInfoPtr lip;
27256   Char      id_buf[100];
27257 
27258   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL) {
27259     return;
27260   }
27261 
27262   CheckSeqLocForPartial (sfp->location, &p5, &p3);
27263   if (p3) {
27264     return;
27265   }
27266 
27267   protbsp = BioseqFindFromSeqLoc (sfp->product);
27268   if (protbsp == NULL) {
27269     return;
27270   }
27271 
27272   prot_str = GetSequenceByBsp (protbsp);
27273   if (prot_str == NULL || (len = StringLen (prot_str)) == 0
27274       || prot_str[len - 1] != '*') {
27275     prot_str = MemFree (prot_str);
27276     return;
27277   }
27278 
27279   BSSeek ((ByteStorePtr) protbsp->seq_data, -1, SEEK_END);
27280   BSDelete ((ByteStorePtr) protbsp->seq_data, 1);
27281   protbsp->length -= 1;
27282   prot_str = MemFree (prot_str);
27283 
27284   for (prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, 0, &fcontext);
27285        prot_sfp != NULL;
27286        prot_sfp = SeqMgrGetNextFeature (protbsp, prot_sfp, 0, 0, &fcontext)) {
27287     if (prot_sfp->location != NULL
27288         && prot_sfp->location->choice == SEQLOC_INT
27289         && (sintp = (SeqIntPtr)prot_sfp->location->data.ptrvalue) != NULL) {
27290       if (sintp->to > protbsp->length - 1) {
27291         sintp->to = protbsp->length - 1;
27292       }
27293     }
27294   }
27295 
27296   lip = (LogInfoPtr) data;
27297   if (lip != NULL) {
27298     if (lip->fp != NULL) {
27299       SeqIdWrite (SeqIdFindBest (protbsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
27300       fprintf (lip->fp, "Trimmed trailing * from %s\n", id_buf);
27301     }
27302     lip->data_in_log = TRUE;
27303   }
27304 }
27305 
27306 
TrimStopsFromCompleteCodingRegions(SeqEntryPtr sep,FILE * log_fp)27307 NLM_EXTERN Boolean TrimStopsFromCompleteCodingRegions (SeqEntryPtr sep, FILE *log_fp)
27308 {
27309   LogInfoData lid;
27310   MemSet (&lid, 0, sizeof (LogInfoData));
27311   lid.fp = log_fp;
27312   VisitFeaturesInSep (sep, &lid, TrimStopsFromCompleteCodingRegionsCallback);
27313   return lid.data_in_log;
27314 }
27315 
27316 
DefLineTypeFromAutodefListType(Uint2 list_type)27317 static DefLineType DefLineTypeFromAutodefListType(Uint2 list_type)
27318 {
27319   DefLineType deflinetype = DEFLINE_USE_FEATURES;
27320 
27321   switch (list_type) {
27322     case Autodef_list_type_feature_list:
27323       deflinetype = DEFLINE_USE_FEATURES;
27324       break;
27325     case Autodef_list_type_complete_sequence:
27326       deflinetype = DEFLINE_COMPLETE_SEQUENCE;
27327       break;
27328     case Autodef_list_type_complete_genome:
27329       deflinetype = DEFLINE_COMPLETE_GENOME;
27330       break;
27331     case Autodef_list_type_sequence:
27332       deflinetype = DEFLINE_SEQUENCE;
27333       break;
27334   }
27335   return deflinetype;
27336 }
27337 
27338 
ApplyAutodefActionToSeqEntry(AutodefActionPtr action,SeqEntryPtr sep)27339 static void ApplyAutodefActionToSeqEntry (AutodefActionPtr action, SeqEntryPtr sep)
27340 {
27341   OrganismDescriptionModifiers od;
27342   ModifierItemLocalPtr modList;
27343   DeflineFeatureRequestList dfrl;
27344   ValNodePtr           vnp, modifier_indices = NULL;
27345   ValNode              field_type, source_qual_choice;
27346   Uint4                i;
27347   Int4                 defline_pos;
27348 
27349   InitOrganismDescriptionModifiers (&od, NULL);
27350   od.use_modifiers = TRUE;
27351 
27352   modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData));
27353   for (i = 0; i < NumDefLineModifiers(); i++) {
27354     modList[i].any_present = FALSE;
27355     modList[i].all_present = FALSE;
27356     modList[i].is_unique = FALSE;
27357     modList[i].first_value_seen = NULL;
27358     modList[i].values_seen = NULL;
27359     modList[i].all_unique = FALSE;
27360     modList[i].status = NULL;
27361     modList[i].required = FALSE;
27362   }
27363   SetRequiredModifiers (modList);
27364 
27365   /* add modifiers specified in action */
27366   source_qual_choice.next = NULL;
27367   source_qual_choice.choice = SourceQualChoice_textqual;
27368   field_type.next = NULL;
27369   field_type.choice = FieldType_source_qual;
27370   field_type.data.ptrvalue = &source_qual_choice;
27371 
27372   for (vnp = action->modifiers; vnp != NULL; vnp = vnp->next) {
27373     source_qual_choice.data.intvalue = vnp->data.intvalue;
27374     defline_pos = GetDeflinePosForFieldType (&field_type);
27375     if (defline_pos > -1) {
27376       modList[defline_pos].required = TRUE;
27377       modList[defline_pos].any_present = TRUE;
27378       ValNodeAddInt (&modifier_indices, 0, defline_pos);
27379 
27380     }
27381   }
27382 
27383   InitFeatureRequests (&dfrl);
27384   dfrl.feature_list_type = DefLineTypeFromAutodefListType (action->clause_list_type);
27385   if (action->misc_feat_parse_rule > 0) {
27386     dfrl.keep_items[RemovableNoncodingProductFeat] = TRUE;
27387     dfrl.misc_feat_parse_rule = action->misc_feat_parse_rule;
27388   }
27389 
27390   AutoDefForSeqEntry (sep, SeqMgrGetEntityIDForSeqEntry (sep), &od, modList, modifier_indices, &dfrl,
27391                       DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE);
27392 
27393   modList = MemFree (modList);
27394   modifier_indices = ValNodeFree (modifier_indices);
27395 
27396 }
27397 
27398 
IsFixPubCapsActionEmpty(FixPubCapsActionPtr action)27399 NLM_EXTERN Boolean IsFixPubCapsActionEmpty (FixPubCapsActionPtr action)
27400 {
27401   if (action == NULL) {
27402     return TRUE;
27403   }
27404   if (action->affiliation || action->authors || action->title || action->affil_country) {
27405     return FALSE;
27406   } else {
27407     return TRUE;
27408   }
27409 }
27410 
27411 
27412 typedef struct fixpubcaps {
27413   FixPubCapsActionPtr action;
27414   ValNodePtr          orgnames;
27415   Int4                num_pub_fields;
27416   Int4                num_sub_fields;
27417   ValNodePtr          object_list;
27418 } FixPubCapsData, PNTR FixPubCapsPtr;
27419 
27420 
IsPubASub(ValNodePtr pub)27421 static Boolean IsPubASub (ValNodePtr pub)
27422 {
27423   if (pub == NULL) {
27424     return FALSE;
27425   } else if (pub->choice == PUB_Sub) {
27426     return TRUE;
27427   } else if (pub->choice == PUB_Equiv) {
27428     return IsPubASub(pub->data.ptrvalue);
27429   } else {
27430     return FALSE;
27431   }
27432 }
27433 
27434 
ApplyFixPubCapsCallback(PubdescPtr pdp,Pointer data)27435 static void ApplyFixPubCapsCallback (PubdescPtr pdp, Pointer data)
27436 {
27437   FixPubCapsPtr f;
27438   CharPtr       orig, tmp;
27439   ValNodePtr    pub;
27440   AuthListPtr   alp = NULL;
27441   ValNodePtr      names;
27442   AuthorPtr     ap, ap_orig;
27443   AffilPtr      affil_orig;
27444 
27445   f = (FixPubCapsPtr)data;
27446   if (f == NULL || f->action == NULL) {
27447     return;
27448   }
27449 
27450   if (f->action->title) {
27451     for (pub = pdp->pub; pub != NULL; pub = pub->next) {
27452       orig = GetPubFieldFromPub (pub, Publication_field_title, NULL);
27453       if (orig != NULL) {
27454         tmp = StringSave (orig);
27455         if (!f->action->punct_only) {
27456           FixCapitalizationInTitle (&tmp, TRUE, f->orgnames);
27457         }
27458         if (StringCmp (orig, tmp) != 0) {
27459           SetPubFieldOnPub (pub, Publication_field_title, NULL, tmp, ExistingTextOption_replace_old);
27460           if (IsPubASub(pub)) {
27461             f->num_sub_fields++;
27462           } else {
27463             f->num_pub_fields++;
27464           }
27465         }
27466         tmp = MemFree (tmp);
27467         orig = MemFree (orig);
27468       }
27469     }
27470   }
27471 
27472   if (f->action->authors && !f->action->punct_only) {
27473     alp = GetAuthListPtr (pdp, NULL);
27474     if (alp != NULL && alp->choice == 1) {
27475       for (names = alp->names; names != NULL; names = names->next) {
27476         ap = names->data.ptrvalue;
27477         ap_orig = AsnIoMemCopy (ap, (AsnReadFunc) AuthorAsnRead, (AsnWriteFunc) AuthorAsnWrite);
27478         FixCapitalizationInAuthor (ap);
27479         if (!AsnIoMemComp (ap, ap_orig, (AsnWriteFunc) AuthorAsnWrite)) {
27480           if (IsPubASub(pdp->pub)) {
27481             f->num_sub_fields++;
27482           } else {
27483             f->num_pub_fields++;
27484           }
27485         }
27486         ap_orig = AuthorFree (ap_orig);
27487       }
27488     }
27489   }
27490 
27491   if (f->action->affiliation) {
27492     if (alp == NULL) {
27493       alp = GetAuthListPtr (pdp, NULL);
27494     }
27495     if (alp != NULL && alp->affil != NULL) {
27496       affil_orig = AsnIoMemCopy (alp->affil, (AsnReadFunc) AffilAsnRead, (AsnWriteFunc) AffilAsnWrite);
27497       FixCapsInPubAffilEx (alp->affil, f->action->punct_only);
27498       if (!AsnIoMemComp (alp->affil, affil_orig, (AsnWriteFunc) AffilAsnWrite)) {
27499         if (IsPubASub(pdp->pub)) {
27500           f->num_sub_fields++;
27501         } else {
27502           f->num_pub_fields++;
27503         }
27504       }
27505       affil_orig = AffilFree (affil_orig);
27506     }
27507   } else if (f->action->affil_country) {
27508     if (alp == NULL) {
27509       alp = GetAuthListPtr (pdp, NULL);
27510     }
27511     if (alp != NULL && alp->affil != NULL && !StringHasNoText (alp->affil->country)) {
27512       orig = StringSave (alp->affil->country);
27513       FixCapitalizationInCountryStringEx (&(alp->affil->country), f->action->punct_only);
27514       if (StringCmp (orig, alp->affil->country) != 0) {
27515         if (IsPubASub(pdp->pub)) {
27516           f->num_sub_fields++;
27517         } else {
27518           f->num_pub_fields++;
27519         }
27520       }
27521       if (StringCmp (alp->affil->country, "USA") == 0 && !StringHasNoText (alp->affil->sub) && !f->action->punct_only) {
27522         orig = StringSave (alp->affil->sub);
27523         FixStateAbbreviationsInAffil (alp->affil, NULL);
27524         if (StringCmp (orig, alp->affil->sub) != 0) {
27525           if (IsPubASub(pdp->pub)) {
27526             f->num_sub_fields++;
27527           } else {
27528             f->num_pub_fields++;
27529           }
27530         }
27531         orig = MemFree (orig);
27532       }
27533       orig = MemFree (orig);
27534     }
27535   }
27536 }
27537 
27538 
CollectPubObjectsFeatCallback(SeqFeatPtr sfp,Pointer data)27539 static void CollectPubObjectsFeatCallback (SeqFeatPtr sfp, Pointer data)
27540 {
27541   FixPubCapsPtr f;
27542 
27543   if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (f = (FixPubCapsPtr) data) == NULL) {
27544     return;
27545   }
27546 
27547   if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, f->action->constraint)) {
27548     ValNodeAddPointer (&(f->object_list), OBJ_SEQFEAT, sfp);
27549   }
27550 }
27551 
27552 
CollectPubObjectsDescCallback(SeqDescPtr sdp,Pointer data)27553 static void CollectPubObjectsDescCallback (SeqDescPtr sdp, Pointer data)
27554 {
27555   FixPubCapsPtr f;
27556 
27557   if (sdp == NULL || sdp->choice != Seq_descr_pub || (f = (FixPubCapsPtr) data) == NULL) {
27558     return;
27559   }
27560 
27561   if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, f->action->constraint)) {
27562     ValNodeAddPointer (&(f->object_list), OBJ_SEQDESC, sdp);
27563   }
27564 }
27565 
27566 
ApplyFixPubCapsToSeqEntry(FixPubCapsActionPtr action,SeqEntryPtr sep,FILE * log_fp)27567 static Boolean ApplyFixPubCapsToSeqEntry (FixPubCapsActionPtr action, SeqEntryPtr sep, FILE *log_fp)
27568 {
27569   FixPubCapsData         f;
27570   ValNodePtr             vnp;
27571   PubdescPtr             pdp;
27572   SeqFeatPtr             sfp;
27573   SeqDescPtr             sdp;
27574   CharPtr                summ;
27575   Boolean                rval = FALSE;
27576   AuthListPtr            alp;
27577   ValNodePtr             names;
27578   AuthorPtr              ap;
27579   SeqSubmitPtr           ssp;
27580   SubmitBlockPtr         sbp;
27581   CitSubPtr              csp;
27582 
27583   if (action == NULL || sep == NULL) return FALSE;
27584 
27585   MemSet (&f, 0, sizeof (FixPubCapsData));
27586   f.action = action;
27587 
27588   /* collect pub objects that match constraint */
27589   VisitDescriptorsInSep (sep, &f, CollectPubObjectsDescCallback);
27590   VisitFeaturesInSep (sep, &f, CollectPubObjectsFeatCallback);
27591 
27592   if (f.object_list == NULL) {
27593     /* nothing to change */
27594     return FALSE;
27595   }
27596 
27597   if (action->title) {
27598     /* get org names to use in fixes */
27599     VisitBioSourcesInSep (sep, &f.orgnames, GetOrgNamesInRecordCallback);
27600   }
27601 
27602   for (vnp = f.object_list; vnp != NULL; vnp = vnp->next) {
27603     pdp = NULL;
27604     if (vnp->choice == OBJ_SEQFEAT) {
27605       sfp = vnp->data.ptrvalue;
27606       pdp = sfp->data.value.ptrvalue;
27607     } else if (vnp->choice == OBJ_SEQDESC) {
27608       sdp = vnp->data.ptrvalue;
27609       pdp = sdp->data.ptrvalue;
27610     }
27611     ApplyFixPubCapsCallback (pdp, &f);
27612   }
27613 
27614   ssp = FindSeqSubmitForSeqEntry (sep);
27615   if (ssp != NULL) {
27616     sbp = ssp->sub;
27617     if (sbp != NULL) {
27618       csp = sbp->cit;
27619       if (csp != NULL) {
27620         alp = csp->authors;
27621         if (alp != NULL && alp->choice == 1) {
27622           for (names = alp->names; names != NULL; names = names->next) {
27623             ap = names->data.ptrvalue;
27624             if (f.action->authors && !f.action->punct_only) {
27625               FixCapitalizationInAuthor (ap);
27626               f.num_sub_fields++;
27627             }
27628           }
27629         }
27630       }
27631     }
27632   }
27633 
27634   f.orgnames = ValNodeFree (f.orgnames);
27635 
27636   if (f.num_sub_fields > 0 || f.num_pub_fields > 0) {
27637     rval = TRUE;
27638     if (log_fp != NULL) {
27639       summ = SummarizeFixPubCapsAction (action);
27640       if (f.num_sub_fields > 0) {
27641         fprintf (log_fp, "Fixed capitalization in %d publication fields in submitter blocks during %s\n", f.num_sub_fields, summ);
27642       }
27643       if (f.num_pub_fields > 0) {
27644         fprintf (log_fp, "Fixed capitalization in %d publication fields in publication blocks during %s\n", f.num_pub_fields, summ);
27645       }
27646       summ = MemFree (summ);
27647     }
27648   }
27649 
27650   return rval;
27651 }
27652 
27653 
FixAuthorLastNamesAuthor(AuthorPtr author,ValNodeBlockPtr block)27654 static void FixAuthorLastNamesAuthor (AuthorPtr author, ValNodeBlockPtr block)
27655 {
27656   NameStdPtr pNameStandard;
27657   CharPtr    newval;
27658   CharPtr    str;
27659   CharPtr    fmt = "%s to %s";
27660 
27661   if (author == NULL || author->name == NULL || author->name->choice != 2) {
27662     return;
27663   }
27664   pNameStandard = author->name->data;
27665   if (pNameStandard != NULL && pNameStandard->names[0] != NULL)
27666   {
27667     if (IsAllCaps(pNameStandard->names[0])) {
27668       newval = StringSave (pNameStandard->names[0]);
27669       FixCapitalizationInElement (&newval, FALSE, FALSE, TRUE);
27670       if (StringCmp (pNameStandard->names[0], newval) != 0) {
27671         str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (pNameStandard->names[0]) + StringLen (newval)));
27672         sprintf (str, fmt, pNameStandard->names[0], newval);
27673         ValNodeAddPointerToEnd (block, 0, str);
27674         pNameStandard->names[0] = MemFree (pNameStandard->names[0]);
27675         pNameStandard->names[0] = newval;
27676         newval = NULL;
27677       } else {
27678         newval = MemFree (newval);
27679       }
27680     }
27681   }
27682 }
27683 
27684 
FixAuthorNameAuthor(AuthorPtr author,ValNodeBlockPtr block)27685 static void FixAuthorNameAuthor (AuthorPtr author, ValNodeBlockPtr block)
27686 {
27687   CharPtr oldval, newval;
27688   CharPtr    str;
27689   CharPtr    fmt = "%s to %s";
27690 
27691   oldval = GetAuthorStringEx (author, FALSE);
27692   if (IsAllCaps (oldval)) {
27693     FixCapitalizationInAuthor (author);
27694     newval = GetAuthorStringEx (author, FALSE);
27695     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (oldval) + StringLen (newval)));
27696     sprintf (str, fmt, oldval, newval);
27697     ValNodeAddPointerToEnd (block, 0, str);
27698   }
27699 }
27700 
27701 
27702 typedef struct fixsingleauthor {
27703   Boolean last_name_only;
27704   ValNodeBlock block;
27705 } FixSingleAuthorData, PNTR FixSingleAuthorPtr;
27706 
27707 
FixAuthorNamesPub(PubPtr pub,FixSingleAuthorPtr f)27708 static void FixAuthorNamesPub (PubPtr pub, FixSingleAuthorPtr f)
27709 {
27710   AuthListPtr alp;
27711   ValNodePtr  names;
27712 
27713   if (f == NULL) {
27714     return;
27715   }
27716   alp = GetAuthorListForPub (pub);
27717   if (alp != NULL && alp->choice == 1) {
27718     for (names = alp->names; names != NULL; names = names->next) {
27719       if (f->last_name_only) {
27720         FixAuthorLastNamesAuthor(names->data.ptrvalue, &(f->block));
27721       } else {
27722         FixAuthorNameAuthor (names->data.ptrvalue, &(f->block));
27723       }
27724     }
27725   }
27726 }
27727 
27728 
FixAuthorNamesPubdesc(PubdescPtr pdp,FixSingleAuthorPtr f)27729 static void FixAuthorNamesPubdesc (PubdescPtr pdp, FixSingleAuthorPtr f)
27730 {
27731   ValNodePtr vnp;
27732 
27733   if (pdp != NULL) {
27734     for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
27735       FixAuthorNamesPub (vnp, f);
27736     }
27737   }
27738 }
27739 
27740 
FixAuthorNamesCapsDescCallback(SeqDescPtr sdp,Pointer data)27741 static void FixAuthorNamesCapsDescCallback (SeqDescPtr sdp, Pointer data)
27742 {
27743   if (sdp != NULL && sdp->choice == Seq_descr_pub) {
27744     FixAuthorNamesPubdesc(sdp->data.ptrvalue, data);
27745   }
27746 }
27747 
27748 
FixAuthorNamesCapsFeatCallback(SeqFeatPtr sfp,Pointer data)27749 static void FixAuthorNamesCapsFeatCallback (SeqFeatPtr sfp, Pointer data)
27750 {
27751   if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
27752     FixAuthorNamesPubdesc(sfp->data.value.ptrvalue, data);
27753   }
27754 }
27755 
27756 
FixAuthorNamesCaps(FixAuthorCapsPtr action,SeqEntryPtr sep,FILE * log_fp)27757 static Boolean FixAuthorNamesCaps (FixAuthorCapsPtr action, SeqEntryPtr sep, FILE *log_fp)
27758 {
27759   FixSingleAuthorData fix;
27760   ValNodePtr   vnp;
27761   CharPtr      fmt = "Fix Author Last Names Capitalization: Changed %s %d time%s\n";
27762   Int4         count;
27763   CharPtr      curr;
27764 
27765   if (action == NULL) {
27766     return FALSE;
27767   }
27768   fix.last_name_only = action->last_name_only;
27769   InitValNodeBlock (&(fix.block), NULL);
27770 
27771   /* collect pub objects that match constraint */
27772   VisitDescriptorsInSep (sep, &fix, FixAuthorNamesCapsDescCallback);
27773   VisitFeaturesInSep (sep, &fix, FixAuthorNamesCapsFeatCallback);
27774 
27775   if (fix.block.head == NULL) {
27776     /* nothing changed */
27777     return FALSE;
27778   } else {
27779     /* report changes */
27780     if (log_fp != NULL) {
27781       fix.block.head = ValNodeSort (fix.block.head, SortVnpByString);
27782       curr = fix.block.head->data.ptrvalue;
27783       count = 1;
27784       for (vnp = fix.block.head->next; vnp != NULL; vnp = vnp->next) {
27785         if (StringCmp (curr, vnp->data.ptrvalue) == 0) {
27786           count++;
27787         } else {
27788           fprintf (log_fp, fmt, curr, count, count > 1 ? "s" : "");
27789           curr = vnp->data.ptrvalue;
27790           count = 1;
27791         }
27792       }
27793       fprintf (log_fp, fmt, curr, count, count > 1 ? "s" : "");
27794     }
27795     fix.block.head = ValNodeFreeData (fix.block.head);
27796     return TRUE;
27797   }
27798 }
27799 
27800 
IsFieldSortable(FieldTypePtr field)27801 NLM_EXTERN Boolean IsFieldSortable (FieldTypePtr field)
27802 {
27803   Boolean rval = FALSE;
27804   FeatureFieldPtr ffield;
27805 
27806   if (field == NULL) {
27807     return FALSE;
27808   }
27809   if (field->choice == FieldType_feature_field) {
27810     ffield = field->data.ptrvalue;
27811     if (ffield != NULL) {
27812       if ((ffield->type == Macro_feature_type_cds || ffield->type == Macro_feature_type_prot)
27813           && ffield->field->choice == FeatQualChoice_legal_qual
27814           && ffield->field->data.intvalue == Feat_qual_legal_product) {
27815         rval = TRUE;
27816       }
27817     }
27818   } else if (field->choice == FieldType_cds_gene_prot) {
27819     if (field->data.intvalue == CDSGeneProt_field_prot_name) {
27820       rval = TRUE;
27821     }
27822   }
27823   return rval;
27824 }
27825 
27826 
SortFieldsInSeqEntry(SortFieldsActionPtr action,SeqEntryPtr sep)27827 static Int4 SortFieldsInSeqEntry (SortFieldsActionPtr action, SeqEntryPtr sep)
27828 {
27829   ValNodePtr          object_list = NULL, vnp;
27830   Int4                num = 0;
27831 
27832   if (action == NULL || action->field == NULL || !IsFieldSortable(action->field) || sep == NULL) {
27833     return 0;
27834   }
27835 
27836   object_list = GetObjectListForFieldType (action->field->choice, sep);
27837   for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
27838     if (DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, action->constraint)
27839         && IsObjectAppropriateForFieldValue(vnp->choice, vnp->data.ptrvalue, action->field)) {
27840       if (SortFieldsForObject (vnp->choice, vnp->data.ptrvalue, action->field, action->order)) {
27841         num++;
27842       }
27843     }
27844   }
27845 
27846 
27847   return num;
27848 }
27849 
27850 
DoStringsMatch(CharPtr str1,CharPtr str2,Boolean case_sensitive)27851 static Boolean DoStringsMatch (CharPtr str1, CharPtr str2, Boolean case_sensitive)
27852 {
27853   Boolean rval = FALSE;
27854 
27855   if (case_sensitive) {
27856     if (StringCmp (str1, str2) == 0) {
27857       rval = TRUE;
27858     }
27859   } else if (StringICmp (str1, str2) == 0) {
27860     rval = TRUE;
27861   }
27862   return rval;
27863 }
27864 
27865 
DoGBQualListsMatch(GBQualPtr gbq1,GBQualPtr gbq2,Boolean case_sensitive)27866 static Boolean DoGBQualListsMatch (GBQualPtr gbq1, GBQualPtr gbq2, Boolean case_sensitive)
27867 {
27868   Boolean rval = TRUE;
27869 
27870   while (rval && gbq1 != NULL && gbq2 != NULL) {
27871     if (!DoStringsMatch (gbq1->qual, gbq2->qual, case_sensitive)) {
27872       rval = FALSE;
27873     } else if (!DoStringsMatch (gbq1->val, gbq2->val, case_sensitive)) {
27874       rval = FALSE;
27875     } else {
27876       gbq1 = gbq1->next;
27877       gbq2 = gbq2->next;
27878     }
27879   }
27880   if (gbq1 != NULL || gbq2 != NULL) {
27881     rval = FALSE;
27882   }
27883   return rval;
27884 }
27885 
27886 
CheckBioseqForPartial(BioseqPtr bsp,BoolPtr partial5,BoolPtr partial3)27887 static Boolean CheckBioseqForPartial (BioseqPtr bsp, BoolPtr partial5, BoolPtr partial3)
27888 {
27889   SeqMgrDescContext context;
27890   SeqDescrPtr       sdp;
27891   MolInfoPtr        mip;
27892   Boolean           rval = FALSE;
27893 
27894   if (bsp == NULL) {
27895     return FALSE;
27896   }
27897   if (partial5 != NULL) {
27898     *partial5 = FALSE;
27899   }
27900   if (partial3 != NULL) {
27901     *partial3 = FALSE;
27902   }
27903   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
27904   if (sdp != NULL && (mip = (MolInfoPtr) sdp->data.ptrvalue) != NULL) {
27905     /* partial 5 */
27906     if (mip->completeness == 3 || mip->completeness == 5) {
27907       if (partial5 != NULL) {
27908         *partial5 = TRUE;
27909       }
27910       rval = TRUE;
27911     }
27912     /* partial 3 */
27913     if (mip->completeness == 4 || mip->completeness == 5) {
27914       if (partial3 != NULL) {
27915         *partial3 = TRUE;
27916       }
27917       rval = TRUE;
27918     }
27919     if (mip->completeness == 2) {
27920       rval = TRUE;
27921     }
27922   }
27923   return rval;
27924 }
27925 
27926 
ProductsMatch(SeqLocPtr slp1,SeqLocPtr slp2,Boolean case_sensitive,Boolean ignore_partial)27927 static Boolean ProductsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean case_sensitive, Boolean ignore_partial)
27928 {
27929   BioseqPtr bsp1, bsp2;
27930   Int2      ctr, pos1, pos2;
27931   Char      buf1[51];
27932   Char      buf2[51];
27933   Int4      len = 50;
27934   SeqFeatPtr sfp1, sfp2;
27935   SeqMgrFeatContext fcontext1, fcontext2;
27936   Boolean           partial5_1, partial5_2, partial3_1, partial3_2;
27937 
27938   if (slp1 == NULL && slp2 == NULL) {
27939     return TRUE;
27940   } else if (slp1 == NULL || slp2 == NULL) {
27941     return FALSE;
27942   } else if (SeqLocCompare (slp1, slp2) == SLC_A_EQ_B) {
27943     return TRUE;
27944   } else {
27945     bsp1 = BioseqFindFromSeqLoc (slp1);
27946     bsp2 = BioseqFindFromSeqLoc (slp2);
27947     if (bsp1 == NULL || bsp2 == NULL) {
27948       /* can't compare, assume they don't match */
27949       return FALSE;
27950     } else if (bsp1->length != bsp2->length) {
27951       return FALSE;
27952     } else {
27953       CheckBioseqForPartial (bsp1, &partial5_1, &partial3_1);
27954       CheckBioseqForPartial (bsp2, &partial5_2, &partial3_2);
27955       if (!ignore_partial
27956           && ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)
27957           || (partial3_1 && !partial3_2) || (!partial3_1 && partial3_2))) {
27958         return FALSE;
27959       }
27960       /* check that translation sequences match */
27961       pos1 = 0;
27962       pos2 = 0;
27963       if (ignore_partial) {
27964         if (partial5_1 || partial5_2) {
27965           pos1++;
27966           pos2++;
27967         }
27968       }
27969       while (pos1 < bsp1->length && pos2 < bsp2->length) {
27970         ctr = SeqPortStreamInt (bsp1, pos1, MIN(pos1 + len - 1, bsp1->length - 1), Seq_strand_plus,
27971                             STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
27972                             (Pointer) buf1, NULL);
27973         ctr = SeqPortStreamInt (bsp2, pos2, MIN(pos2 + len - 1, bsp2->length - 1), Seq_strand_plus,
27974                             STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
27975                             (Pointer) buf2, NULL);
27976         if (StringNCmp (buf1, buf2, ctr) != 0) {
27977           return FALSE;
27978         }
27979         pos1 += len;
27980         pos2 += len;
27981       }
27982 
27983       /* now check that protein features match */
27984       sfp1 = SeqMgrGetNextFeature (bsp1, NULL, 0, 0, &fcontext1);
27985       sfp2 = SeqMgrGetNextFeature (bsp2, NULL, 0, 0, &fcontext2);
27986       while (sfp1 != NULL && sfp2 != NULL) {
27987         if (!DoFeaturesMatch (sfp1, sfp2, TRUE, case_sensitive, ignore_partial)) {
27988           return FALSE;
27989         }
27990         sfp1 = SeqMgrGetNextFeature (bsp1, sfp1, SEQFEAT_PROT, 0, &fcontext1);
27991         sfp2 = SeqMgrGetNextFeature (bsp2, sfp2, SEQFEAT_PROT, 0, &fcontext2);
27992       }
27993       if (sfp1 != NULL || sfp2 != NULL) {
27994         return FALSE;
27995       } else {
27996         return TRUE;
27997       }
27998     }
27999   }
28000 }
28001 
28002 
DoLocationPartialsMatch(SeqLocPtr slp1,SeqLocPtr slp2)28003 static Boolean DoLocationPartialsMatch (SeqLocPtr slp1, SeqLocPtr slp2)
28004 {
28005   Boolean partial5_1, partial3_1, partial1;
28006   Boolean partial5_2, partial3_2, partial2;
28007 
28008   partial1 = CheckSeqLocForPartial (slp1, &partial5_1, &partial3_1);
28009   partial2 = CheckSeqLocForPartial (slp2, &partial5_2, &partial3_2);
28010   if ((partial1 && !partial2) || (!partial1 && partial2)) {
28011     return FALSE;
28012   }
28013   if ((partial5_1 && !partial5_2) || (!partial5_1 && partial5_2)) {
28014     return FALSE;
28015   }
28016   if ((partial3_1 && !partial3_2) || (!partial3_1 && partial3_2)) {
28017     return FALSE;
28018   }
28019   return TRUE;
28020 }
28021 
28022 
DoLocationsMatch(SeqLocPtr slp1,SeqLocPtr slp2,Boolean allow_different_sequences,Boolean ignore_partial)28023 static Boolean DoLocationsMatch (SeqLocPtr slp1, SeqLocPtr slp2, Boolean allow_different_sequences, Boolean ignore_partial)
28024 {
28025   SeqLocPtr slp_tmp1, slp_tmp2;
28026 
28027   if (slp1 == NULL && slp2 == NULL) {
28028     return TRUE;
28029   } else if (slp1 == NULL || slp2 == NULL) {
28030     return FALSE;
28031   }
28032 
28033   if (!ignore_partial && !DoLocationPartialsMatch (slp1, slp2)) {
28034     return FALSE;
28035   }
28036   if (allow_different_sequences) {
28037     for (slp_tmp1 = SeqLocFindNext (slp1, NULL), slp_tmp2 = SeqLocFindNext (slp2, NULL);
28038          slp_tmp1 != NULL && slp_tmp2 != NULL;
28039          slp_tmp1 = SeqLocFindNext (slp1, slp_tmp1), slp_tmp2 = SeqLocFindNext (slp2, slp_tmp2)) {
28040       if (SeqLocStart (slp_tmp1) != SeqLocStart (slp_tmp2)
28041           || SeqLocStop (slp_tmp1) != SeqLocStop (slp_tmp2)
28042           || (!ignore_partial && !DoLocationPartialsMatch (slp_tmp1, slp_tmp2))) {
28043         return FALSE;
28044       }
28045     }
28046   } else if (SeqLocCompare (slp1, slp2) != SLC_A_EQ_B) {
28047     return FALSE;
28048   }
28049   return TRUE;
28050 }
28051 
28052 
DoCdRegionsMatch(CdRegionPtr crp1,CdRegionPtr crp2)28053 static Boolean DoCdRegionsMatch (CdRegionPtr crp1, CdRegionPtr crp2)
28054 {
28055   if (crp1 == NULL && crp2 == NULL) {
28056     return TRUE;
28057   } else if (crp1 == NULL || crp2 == NULL) {
28058     return FALSE;
28059   } else if ((crp1->orf && !crp2->orf) || (!crp1->orf && crp2->orf)){
28060     return FALSE;
28061   } else if ((crp1->conflict && !crp2->conflict) || (!crp1->conflict && crp2->conflict)){
28062     return FALSE;
28063   } else if (crp1->gaps != crp2->gaps) {
28064     return FALSE;
28065   } else if (crp1->mismatch != crp2->mismatch) {
28066     return FALSE;
28067   } else if (crp1->stops != crp2->stops) {
28068     return FALSE;
28069   } else if ((crp1->genetic_code == NULL && crp2->genetic_code != NULL)
28070              || (crp1->genetic_code != NULL && crp2->genetic_code == NULL)
28071              || (crp1->genetic_code != NULL && crp2->genetic_code != NULL
28072                  && !AsnIoMemComp (crp1->genetic_code, crp2->genetic_code, (AsnWriteFunc) GeneticCodeAsnWrite))) {
28073     return FALSE;
28074   } else if ((crp1->code_break == NULL && crp2->code_break != NULL)
28075              || (crp1->code_break != NULL && crp2->code_break == NULL)
28076              || (crp1->code_break != NULL && crp2->code_break != NULL
28077                  && !AsnIoMemComp (crp1->code_break, crp2->code_break, (AsnWriteFunc) CodeBreakAsnWrite))) {
28078     return FALSE;
28079   } else if (crp1->frame != crp2->frame) {
28080     if ((crp1->frame == 0 || crp1->frame == 1) && (crp2->frame == 0 || crp2->frame == 1)) {
28081       /* both effectively frame 1, ignore this difference */
28082     } else {
28083       return FALSE;
28084     }
28085   }
28086   return TRUE;
28087 }
28088 
28089 
DoesSeqFeatDataMatch(ChoicePtr d1,ChoicePtr d2)28090 static Boolean DoesSeqFeatDataMatch (ChoicePtr d1, ChoicePtr d2)
28091 {
28092   if (d1 == NULL && d2 == NULL) {
28093     return TRUE;
28094   } else if (d1 == NULL || d2 == NULL) {
28095     return FALSE;
28096   } else if (d1->choice != d2->choice) {
28097     return FALSE;
28098   } else if (d1->choice == SEQFEAT_CDREGION) {
28099     return DoCdRegionsMatch(d1->value.ptrvalue, d2->value.ptrvalue);
28100   } else {
28101     return AsnIoMemComp(d1, d2, (AsnWriteFunc) SeqFeatDataAsnWrite);
28102   }
28103 }
28104 
28105 
DoFeaturesMatch(SeqFeatPtr sfp1,SeqFeatPtr sfp2,Boolean allow_different_sequences,Boolean case_sensitive,Boolean ignore_partial)28106 NLM_EXTERN Boolean DoFeaturesMatch (SeqFeatPtr sfp1, SeqFeatPtr sfp2, Boolean allow_different_sequences, Boolean case_sensitive, Boolean ignore_partial)
28107 {
28108   if (sfp1 == NULL && sfp2 == NULL) {
28109     return TRUE;
28110   } else if (sfp1 == NULL || sfp2 == NULL) {
28111     return FALSE;
28112   } if (sfp1->data.choice != sfp2->data.choice) {
28113     return FALSE;
28114   } else if (sfp1->idx.subtype != sfp2->idx.subtype) {
28115     return FALSE;
28116   } else if (!ignore_partial && ((sfp1->partial && !sfp2->partial) || (!sfp1->partial && sfp2->partial))) {
28117     return FALSE;
28118   } else if ((sfp1->pseudo && !sfp2->pseudo) || (!sfp1->pseudo && sfp2->pseudo)) {
28119     return FALSE;
28120   } else if ((sfp1->excpt && !sfp2->excpt) || (!sfp1->excpt && sfp2->excpt)) {
28121     return FALSE;
28122   } else if (!DoLocationsMatch (sfp1->location, sfp2->location, allow_different_sequences, ignore_partial)) {
28123     return FALSE;
28124   } else if (!DoStringsMatch (sfp1->comment, sfp2->comment, case_sensitive)) {
28125     return FALSE;
28126   } else if (!DoStringsMatch (sfp1->title, sfp2->title, case_sensitive)) {
28127     return FALSE;
28128   } else if (sfp1->ext != NULL || sfp2->ext != NULL) {
28129     return FALSE;
28130   } else if (sfp1->exts != NULL || sfp2->exts != NULL) {
28131     return FALSE;
28132   } else if (!DoStringsMatch (sfp1->except_text, sfp2->except_text, case_sensitive)) {
28133     return FALSE;
28134   } else if (sfp1->exp_ev != sfp2->exp_ev) {
28135     return FALSE;
28136   } else if (!DoGBQualListsMatch (sfp1->qual, sfp2->qual, case_sensitive)) {
28137     return FALSE;
28138   } else if ((sfp1->cit != NULL || sfp2->cit != NULL) && PubMatch (sfp1->cit, sfp2->cit) != 0) {
28139     return FALSE;
28140   } else if (!DbxrefsMatch (sfp1->dbxref, sfp2->dbxref, case_sensitive)) {
28141     return FALSE;
28142   } else if (!DoesSeqFeatDataMatch(&(sfp1->data), &(sfp2->data))) {
28143     return FALSE;
28144   } else if (!XrefsMatch (sfp1->xref, sfp2->xref)) {
28145     return FALSE;
28146   } else if (!ProductsMatch (sfp1->product, sfp2->product, case_sensitive, ignore_partial)) {
28147     return FALSE;
28148   } else {
28149     return TRUE;
28150   }
28151 }
28152 
28153 
28154 typedef struct dupfeats {
28155   ValNodePtr delete_list;
28156   RemoveDuplicateFeatureActionPtr action;
28157 } DupFeatsData, PNTR DupFeatsPtr;
28158 
28159 
FindDuplicateFeatsCallback(BioseqPtr bsp,Pointer data)28160 static void FindDuplicateFeatsCallback (BioseqPtr bsp, Pointer data)
28161 {
28162   DupFeatsPtr       dfp;
28163   SeqFeatPtr        sfp1, sfp2;
28164   SeqMgrFeatContext fcontext;
28165   Uint1             featdef;
28166   ValNodePtr        vnp_prev = NULL;
28167 
28168   if (bsp == NULL || (dfp = (DupFeatsPtr) data) == NULL) {
28169     return;
28170   }
28171 
28172   if (dfp->action->type == Macro_feature_type_any) {
28173     featdef = 0;
28174   } else {
28175     featdef = GetFeatdefFromFeatureType (dfp->action->type);
28176   }
28177   sfp1 = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
28178   while (sfp1 != NULL) {
28179     sfp2 = SeqMgrGetNextFeature (bsp, sfp1, 0, featdef, &fcontext);
28180     if (sfp1 == sfp2) {
28181       break;
28182     }
28183     if (DoFeaturesMatch (sfp1, sfp2, FALSE, dfp->action->case_sensitive, dfp->action->ignore_partials)) {
28184       if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp2, dfp->action->rd_constraint)) {
28185         vnp_prev = ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp2);
28186       } else if ((vnp_prev == NULL || vnp_prev->data.ptrvalue != sfp1)
28187                  && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp1, dfp->action->rd_constraint)) {
28188         ValNodeAddPointer (&(dfp->delete_list), OBJ_SEQFEAT, sfp1);
28189       }
28190     }
28191     sfp1 = sfp2;
28192   }
28193 
28194 }
28195 
28196 
GetDuplicateFeaturesForRemoval(SeqEntryPtr sep,RemoveDuplicateFeatureActionPtr action)28197 NLM_EXTERN ValNodePtr GetDuplicateFeaturesForRemoval (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action)
28198 {
28199   DupFeatsData df;
28200 
28201   MemSet (&df, 0, sizeof (DupFeatsData));
28202   df.action = action;
28203 
28204   VisitBioseqsInSep (sep, &df, FindDuplicateFeatsCallback);
28205   return df.delete_list;
28206 }
28207 
28208 
RemoveDuplicateFeaturesInList(ValNodePtr delete_list,Uint2 entityID,Boolean remove_proteins)28209 NLM_EXTERN void RemoveDuplicateFeaturesInList (ValNodePtr delete_list, Uint2 entityID, Boolean remove_proteins)
28210 {
28211   ValNodePtr vnp;
28212   SeqFeatPtr sfp;
28213   BioseqPtr  protbsp;
28214   SeqEntryPtr sep;
28215 
28216   for (vnp = delete_list; vnp != NULL; vnp = vnp->next) {
28217     sfp = (SeqFeatPtr) vnp->data.ptrvalue;
28218     if (sfp != NULL) {
28219       if (remove_proteins && sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL) {
28220         protbsp = BioseqFindFromSeqLoc (sfp->product);
28221         if (protbsp != NULL) {
28222           protbsp->idx.deleteme = TRUE;
28223         }
28224       }
28225       sfp->idx.deleteme = TRUE;
28226     }
28227   }
28228 
28229   DeleteMarkedObjects (entityID, 0, NULL);
28230   if (remove_proteins) {
28231     sep = GetTopSeqEntryForEntityID (entityID);
28232     RenormalizeNucProtSets (sep, TRUE);
28233   }
28234 
28235 }
28236 
28237 
RemoveDuplicateFeaturesInSeqEntry(SeqEntryPtr sep,RemoveDuplicateFeatureActionPtr action,FILE * log_fp)28238 NLM_EXTERN Boolean RemoveDuplicateFeaturesInSeqEntry (SeqEntryPtr sep, RemoveDuplicateFeatureActionPtr action, FILE *log_fp)
28239 {
28240   ValNodePtr delete_list;
28241   Int4       num;
28242 
28243   delete_list = GetDuplicateFeaturesForRemoval (sep, action);
28244   if (delete_list == NULL) {
28245     return FALSE;
28246   }
28247 
28248   if (log_fp != NULL) {
28249     num = ValNodeLen (delete_list);
28250     fprintf (log_fp, "Removed %d duplicate features\n", num);
28251   }
28252 
28253   RemoveDuplicateFeaturesInList (delete_list, ObjMgrGetEntityIDForChoice(sep), action->remove_proteins);
28254   return TRUE;
28255 }
28256 
28257 
DoesTextContainOnlyTheseWords(CharPtr txt,ValNodePtr word_list)28258 NLM_EXTERN Boolean DoesTextContainOnlyTheseWords (CharPtr txt, ValNodePtr word_list)
28259 {
28260   CharPtr cp;
28261   ValNodePtr vnp;
28262   Boolean    match;
28263   Boolean    at_least_one = FALSE;
28264   Int4       len;
28265 
28266   if (StringHasNoText(txt)) {
28267     return FALSE;
28268   }
28269 
28270   cp = txt;
28271   while (isspace (*cp) || ispunct(*cp)) {
28272     cp++;
28273   }
28274   match = TRUE;
28275   while (*cp != 0 && match) {
28276     match = FALSE;
28277     for (vnp = word_list; vnp != NULL && !match; vnp = vnp->next) {
28278       len = StringLen (vnp->data.ptrvalue);
28279       if (StringNICmp (cp, vnp->data.ptrvalue, len) == 0
28280           && (*(cp + len) == 0 || isspace(*(cp + len)) || ispunct(*(cp + len)))) {
28281         match = TRUE;
28282         cp += len;
28283         at_least_one = TRUE;
28284       }
28285     }
28286     while (isspace (*cp) || ispunct(*cp)) {
28287       cp++;
28288     }
28289   }
28290   return (match && at_least_one);
28291 }
28292 
28293 
WordListFromText(CharPtr txt)28294 static ValNodePtr WordListFromText (CharPtr txt)
28295 {
28296   ValNodePtr list = NULL;
28297   CharPtr    start, end, word;
28298   Int4       len;
28299 
28300   if (StringHasNoText(txt)) {
28301     return NULL;
28302   }
28303 
28304   start = txt;
28305 
28306   while (isspace (*start) || ispunct(*start)) {
28307     start++;
28308   }
28309   while (*start != 0) {
28310     end = start + 1;
28311     len = 1;
28312     while (*end != 0 && !isspace (*end) && !ispunct(*end)) {
28313       end++;
28314       len++;
28315     }
28316     word = (CharPtr) MemNew (sizeof (Char) * (len + 1));
28317     StringNCpy (word, start, len);
28318     word[len] = 0;
28319     ValNodeAddPointer (&list, 0, word);
28320     start = end;
28321     while (isspace (*start) || ispunct(*start)) {
28322       start++;
28323     }
28324   }
28325 
28326   return list;
28327 }
28328 
28329 
28330 static CharPtr s_SpecialLineageWords[] = {
28331   "Class",
28332   "Classification",
28333   "Domain",
28334   "Family",
28335   "Genus",
28336   "Kingdom",
28337   "Lineage",
28338   "Note",
28339   "Order",
28340   "Organism",
28341   "Phylum",
28342   "Species",
28343   "Superfamily",
28344   "Tax class/lineage",
28345   "Taxonomic classification",
28346   "Taxonomic Classification is",
28347   "Taxonomy",
28348   NULL
28349 };
28350 
RemoveLineageNoteFromBioSource(BioSourcePtr biop,FILE * fp)28351 static Boolean RemoveLineageNoteFromBioSource (BioSourcePtr biop, FILE *fp)
28352 {
28353   SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
28354   OrgModPtr mod, mod_prev = NULL, mod_next;
28355   Boolean any_removed = FALSE;
28356   ValNodePtr word_list = NULL;
28357   Int4 i;
28358 
28359   if (!HasTaxonomyID (biop) || biop->org == NULL
28360       || biop->org->orgname == NULL
28361       || StringHasNoText (biop->org->orgname->lineage)) {
28362     return FALSE;
28363   }
28364 
28365   word_list = WordListFromText(biop->org->orgname->lineage);
28366   ValNodeLink (&word_list, WordListFromText(biop->org->taxname));
28367   for (i = 0; s_SpecialLineageWords[i] != NULL; i++) {
28368     ValNodeAddPointer (&word_list, 0, StringSave (s_SpecialLineageWords[i]));
28369   }
28370 
28371   for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) {
28372     ssp_next = ssp->next;
28373     if (ssp->subtype == SUBSRC_other && DoesTextContainOnlyTheseWords(ssp->name, word_list)) {
28374       if (ssp_prev == NULL) {
28375         biop->subtype = ssp_next;
28376       } else {
28377         ssp_prev->next = ssp_next;
28378       }
28379       ssp->next = NULL;
28380       if (fp != NULL) {
28381         fprintf (fp, "Removed note %s where lineage is %s\n", ssp->name, biop->org->orgname->lineage);
28382       }
28383       ssp = SubSourceFree (ssp);
28384       any_removed = TRUE;
28385     } else {
28386       ssp_prev = ssp;
28387     }
28388   }
28389 
28390   for (mod = biop->org->orgname->mod; mod != NULL; mod = mod_next) {
28391     mod_next = mod->next;
28392     if (mod->subtype == ORGMOD_other && DoesTextContainOnlyTheseWords(mod->subname, word_list)) {
28393       if (mod_prev == NULL) {
28394         biop->org->orgname->mod = mod_next;
28395       } else {
28396         mod_prev->next = mod_next;
28397       }
28398       mod->next = NULL;
28399       if (fp != NULL) {
28400         fprintf (fp, "Removed note %s where lineage is %s\n", mod->subname, biop->org->orgname->lineage);
28401       }
28402       mod = OrgModFree (mod);
28403       any_removed = TRUE;
28404     } else {
28405       mod_prev = mod;
28406     }
28407   }
28408   word_list = ValNodeFreeData (word_list);
28409   return any_removed;
28410 }
28411 
28412 
RemoveLineageNotesCallback(BioSourcePtr biop,Pointer data)28413 static void RemoveLineageNotesCallback (BioSourcePtr biop, Pointer data)
28414 {
28415   LogInfoPtr lip;
28416 
28417   if (biop == NULL) {
28418     return;
28419   }
28420   lip = (LogInfoPtr) data;
28421 
28422   if (RemoveLineageNoteFromBioSource(biop, lip == NULL ? NULL : lip->fp)) {
28423     if (lip) {
28424       lip->data_in_log = TRUE;
28425     }
28426   }
28427 }
28428 
28429 
RemoveLineageNotesInSeqEntry(SeqEntryPtr sep,FILE * log_fp)28430 static Boolean RemoveLineageNotesInSeqEntry (SeqEntryPtr sep, FILE *log_fp)
28431 {
28432   LogInfoData lid;
28433 
28434   MemSet (&lid, 0, sizeof (LogInfoData));
28435   lid.fp = log_fp;
28436 
28437   VisitBioSourcesInSep (sep, &lid, RemoveLineageNotesCallback);
28438   return lid.data_in_log;
28439 }
28440 
28441 
28442 typedef struct logandpointer {
28443   LogInfoData lid;
28444   Pointer action;
28445 } LogAndPointerData, PNTR LogAndPointerPtr;
28446 
28447 
GeneXrefMatchesSuppression(GeneRefPtr grp,Uint2 suppression)28448 static Boolean GeneXrefMatchesSuppression (GeneRefPtr grp, Uint2 suppression)
28449 {
28450   Boolean rval = FALSE;
28451 
28452   if (grp == NULL) {
28453     return FALSE;
28454   }
28455 
28456   switch (suppression) {
28457     case Gene_xref_suppression_type_any:
28458       rval = TRUE;
28459       break;
28460     case Gene_xref_suppression_type_suppressing:
28461       if (SeqMgrGeneIsSuppressed(grp)) {
28462         rval = TRUE;
28463       }
28464       break;
28465     case Gene_xref_suppression_type_non_suppressing:
28466       if (!SeqMgrGeneIsSuppressed(grp)) {
28467         rval = TRUE;
28468       }
28469       break;
28470   }
28471   return rval;
28472 }
28473 
28474 
GeneXrefMatchesNecessary(SeqFeatPtr sfp,GeneRefPtr grp,Uint2 necessary)28475 static Boolean GeneXrefMatchesNecessary (SeqFeatPtr sfp, GeneRefPtr grp, Uint2 necessary)
28476 {
28477   Boolean rval = FALSE;
28478 
28479   if (sfp == NULL || grp == NULL) {
28480     return FALSE;
28481   }
28482 
28483   switch (necessary) {
28484     case Gene_xref_necessary_type_any:
28485       rval = TRUE;
28486       break;
28487     case Gene_xref_necessary_type_necessary:
28488       if (!SeqMgrGeneIsSuppressed (grp) && !IsGeneXrefRedundant (sfp)) {
28489         rval = TRUE;
28490       }
28491       break;
28492     case Gene_xref_necessary_type_unnecessary:
28493       if (!SeqMgrGeneIsSuppressed (grp) && IsGeneXrefRedundant (sfp)) {
28494         rval = TRUE;
28495       }
28496       break;
28497   }
28498   return rval;
28499 }
28500 
28501 
RemoveXref(SeqFeatPtr sfp,Uint2 choice,Pointer data)28502 static Boolean RemoveXref (SeqFeatPtr sfp, Uint2 choice, Pointer data)
28503 {
28504   SeqFeatXrefPtr  xref, xref_next, xref_prev = NULL;
28505   Boolean         removed = FALSE;
28506 
28507   if (sfp == NULL) return FALSE;
28508   for (xref = sfp->xref; xref != NULL; xref = xref_next) {
28509     xref_next = xref->next;
28510     if ((xref->data.choice == choice || choice == 0)
28511       && (xref->data.value.ptrvalue == data || data == NULL)) {
28512       if (xref_prev == NULL) {
28513         sfp->xref = xref_next;
28514       } else {
28515         xref_prev->next = xref_next;
28516       }
28517       xref->next = NULL;
28518       xref = SeqFeatXrefFree (xref);
28519       removed = TRUE;
28520     } else {
28521       xref_prev = xref;
28522     }
28523   }
28524   return removed;
28525 }
28526 
28527 
MacroRemoveXrefsCallback(SeqFeatPtr sfp,Pointer data)28528 static void MacroRemoveXrefsCallback(SeqFeatPtr sfp, Pointer data)
28529 {
28530   LogAndPointerPtr lp;
28531   RemoveXrefsActionPtr action;
28532   GeneXrefTypePtr gene;
28533   GeneRefPtr      grp;
28534   CharPtr         text;
28535   ValNode         vn;
28536 
28537   if (sfp == NULL || (lp = (LogAndPointerPtr)data) == NULL
28538       || (action = (RemoveXrefsActionPtr)lp->action) == NULL
28539       || action->xref_type == NULL) {
28540     return;
28541   }
28542   if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) {
28543     return;
28544   }
28545 
28546   switch (action->xref_type->choice) {
28547     case XrefType_gene:
28548       grp = SeqMgrGetGeneXref (sfp);
28549       if (grp != NULL) {
28550         gene = (GeneXrefTypePtr) action->xref_type->data.ptrvalue;
28551         if (gene != NULL) {
28552           if ((gene->feature == Macro_feature_type_any || gene->feature == GetFeatureTypeFromFeatdef(sfp->idx.subtype))
28553               && GeneXrefMatchesSuppression(grp, gene->suppression)
28554               && GeneXrefMatchesNecessary(sfp, grp, gene->necessary)) {
28555             if (RemoveXref(sfp, SEQFEAT_GENE, grp)) {
28556               lp->lid.data_in_log = TRUE;
28557               if (lp->lid.fp != NULL) {
28558                 MemSet (&vn, 0, sizeof (ValNode));
28559                 vn.choice = OBJ_SEQFEAT;
28560                 vn.data.ptrvalue = sfp;
28561                 text = GetDiscrepancyItemText (&vn);
28562                 fprintf (lp->lid.fp, "Removed Gene xref from %s\n", text);
28563                 text = MemFree (text);
28564               }
28565             }
28566           }
28567         }
28568       }
28569       break;
28570   }
28571 }
28572 
28573 
MacroRemoveXrefs(SeqEntryPtr sep,RemoveXrefsActionPtr action,FILE * log_fp)28574 static Boolean MacroRemoveXrefs (SeqEntryPtr sep, RemoveXrefsActionPtr action, FILE *log_fp)
28575 {
28576   LogAndPointerData ld;
28577 
28578   MemSet (&ld.lid, 0, sizeof (LogAndPointerData));
28579   ld.lid.fp = log_fp;
28580   ld.action = action;
28581 
28582   VisitFeaturesInSep (sep, &ld, MacroRemoveXrefsCallback);
28583   return ld.lid.data_in_log;
28584 }
28585 
MacroMakeGeneXrefsCallback(SeqFeatPtr sfp,Pointer data)28586 static void MacroMakeGeneXrefsCallback(SeqFeatPtr sfp, Pointer data)
28587 {
28588   LogAndPointerPtr lp;
28589   MakeGeneXrefActionPtr action;
28590   SeqFeatPtr        gene;
28591   GeneRefPtr        grp;
28592   CharPtr           text;
28593   ValNode           vn;
28594   SeqMgrFeatContext context;
28595   SeqFeatXrefPtr    xref;
28596 
28597   if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || (lp = (LogAndPointerPtr)data) == NULL
28598       || (action = (MakeGeneXrefActionPtr) lp->action) == NULL) {
28599     return;
28600   }
28601 
28602   if (action->feature != Macro_feature_type_any && action->feature != GetFeatureTypeFromFeatdef(sfp->idx.subtype)) {
28603     return;
28604   }
28605 
28606   if (!DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, action->constraint)) {
28607     return;
28608   }
28609 
28610   grp = SeqMgrGetGeneXref (sfp);
28611 
28612   if (grp != NULL) {
28613     return;
28614   }
28615 
28616   gene = SeqMgrGetOverlappingGene (sfp->location, &context);
28617   if (gene != NULL && (grp = (GeneRefPtr) gene->data.value.ptrvalue) != NULL) {
28618     grp = (GeneRefPtr) AsnIoMemCopy (grp, (AsnReadFunc)GeneRefAsnRead, (AsnWriteFunc)GeneRefAsnWrite);
28619     xref = SeqFeatXrefNew ();
28620     xref->data.choice = SEQFEAT_GENE;
28621     xref->data.value.ptrvalue = grp;
28622     xref->next = sfp->xref;
28623     sfp->xref = xref;
28624     lp->lid.data_in_log = TRUE;
28625     if (lp->lid.fp != NULL) {
28626       MemSet (&vn, 0, sizeof (ValNode));
28627       vn.choice = OBJ_SEQFEAT;
28628       vn.data.ptrvalue = sfp;
28629       text = GetDiscrepancyItemText (&vn);
28630       fprintf (lp->lid.fp, "Added Gene xref to %s\n", text);
28631       text = MemFree (text);
28632     }
28633   }
28634 }
28635 
28636 
MacroMakeGeneXrefs(SeqEntryPtr sep,MakeGeneXrefActionPtr action,FILE * log_fp)28637 static Boolean MacroMakeGeneXrefs (SeqEntryPtr sep, MakeGeneXrefActionPtr action, FILE *log_fp)
28638 {
28639   LogAndPointerData ld;
28640 
28641   MemSet (&ld.lid, 0, sizeof (LogAndPointerData));
28642   ld.lid.fp = log_fp;
28643   ld.action = action;
28644 
28645   VisitFeaturesInSep (sep, &ld, MacroMakeGeneXrefsCallback);
28646   return ld.lid.data_in_log;
28647 }
28648 
28649 
MacroMakeBoldXrefs(SeqEntryPtr sep,FILE * log_fp)28650 static Boolean MacroMakeBoldXrefs (SeqEntryPtr sep, FILE *log_fp)
28651 {
28652   Int4 num_created = 0;
28653 
28654   VisitBioseqsInSep (sep, &num_created, ApplyBarcodeDbxrefsToBioseq);
28655 
28656   if (num_created > 0) {
28657     if (log_fp != NULL) {
28658       fprintf (log_fp, "Created %d BARCODE dbxrefs\n", num_created);
28659     }
28660     return TRUE;
28661   } else {
28662     return FALSE;
28663   }
28664 }
28665 
28666 
StripSuffixFromAuthor(AuthorPtr pAuthor)28667 NLM_EXTERN Boolean StripSuffixFromAuthor (AuthorPtr pAuthor)
28668 {
28669   NameStdPtr pNameStandard;
28670   Boolean rval = FALSE;
28671 
28672   if (pAuthor == NULL)
28673     return FALSE;
28674   else if(pAuthor->name->choice != 2)
28675     return FALSE;
28676   pNameStandard = pAuthor->name->data;
28677   if (pNameStandard != NULL && pNameStandard->names[5] != NULL)
28678   {
28679     pNameStandard->names[5][0] = 0;
28680     rval = TRUE;
28681   }
28682   return rval;
28683 }
28684 
TruncateAuthorMiddleInitials(AuthorPtr pAuthor)28685 NLM_EXTERN Boolean TruncateAuthorMiddleInitials (AuthorPtr pAuthor)
28686 {
28687   NameStdPtr pNameStandard;
28688   CharPtr cp;
28689   Boolean rval = FALSE;
28690 
28691   if (pAuthor == NULL)
28692     return FALSE;
28693   else if(pAuthor->name->choice != 2)
28694     return FALSE;
28695   pNameStandard = pAuthor->name->data;
28696   if (pNameStandard != NULL)
28697   {
28698     cp = StringChr (pNameStandard->names[4], '.');
28699     if (cp == NULL || StringChr (cp + 1, '.') == NULL) {
28700       if (StringLen (pNameStandard->names[4]) > 3)
28701       {
28702         pNameStandard->names[4][3] = 0;
28703         pNameStandard->names[4][2] = '.';
28704         rval = TRUE;
28705       }
28706     } else if (StringLen (pNameStandard->names[4]) > 4) {
28707       pNameStandard->names[4][4] = 0;
28708       pNameStandard->names[4][3] = '.';
28709       rval = TRUE;
28710     }
28711   }
28712   return rval;
28713 }
28714 
28715 
MoveAuthorMiddleToFirst(AuthorPtr pAuthor)28716 static Boolean MoveAuthorMiddleToFirst (AuthorPtr pAuthor)
28717 {
28718   NameStdPtr pNameStandard;
28719   CharPtr cp;
28720   Int4 num_letters = 0;
28721   Boolean rval = FALSE;
28722 
28723   if (pAuthor == NULL)
28724     return FALSE;
28725   else if(pAuthor->name->choice != 2)
28726     return FALSE;
28727   pNameStandard = pAuthor->name->data;
28728   if (pNameStandard != NULL)
28729   {
28730     cp = StringChr (pNameStandard->names[4], '.');
28731     if (cp != NULL) {
28732       cp++;
28733       while (isalpha(*(cp + num_letters))) {
28734         num_letters++;
28735       }
28736       if (num_letters > 1) {
28737         SetStringValue (&(pNameStandard->names[1]), cp, ExistingTextOption_append_space);
28738         *cp = 0;
28739         rval = TRUE;
28740       }
28741     }
28742   }
28743   return rval;
28744 }
28745 
28746 
28747 const CharPtr s_AuthorFixActionNames[] = {
28748   "Truncate middle initials",
28749   "Strip author suffix",
28750   "Move middle name to first name"
28751 };
28752 
28753 
SummarizeAuthorFixAction(AuthorFixActionPtr a)28754 NLM_EXTERN CharPtr SummarizeAuthorFixAction (AuthorFixActionPtr a)
28755 {
28756   CharPtr rval = NULL;
28757   CharPtr constraint;
28758 
28759   if (a == NULL) {
28760     return StringSave("Unknown action");
28761   }
28762 
28763   if (a->fix_type < 1 || a->fix_type > sizeof (s_AuthorFixActionNames) / sizeof (CharPtr)) {
28764     return StringSave("Unknown action");
28765   }
28766 
28767   constraint = SummarizeConstraintSet (a->constraint);
28768   if (constraint == NULL) {
28769     rval = StringSave (s_AuthorFixActionNames[a->fix_type - 1]);
28770   } else {
28771     rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (s_AuthorFixActionNames[a->fix_type - 1]) + StringLen (constraint) + 2));
28772     StringCpy (rval, s_AuthorFixActionNames[a->fix_type - 1]);
28773     StringCat (rval, " ");
28774     StringCat (rval, constraint);
28775     constraint = MemFree (constraint);
28776   }
28777   return rval;
28778 }
28779 
28780 
28781 typedef struct pubcollect {
28782   ValNodePtr list;
28783   ValNodePtr constraint;
28784 } PubCollectData, PNTR PubCollectPtr;
28785 
GetPubsForAuthorFixDesc(SeqDescPtr sdp,Pointer data)28786 static void GetPubsForAuthorFixDesc (SeqDescPtr sdp, Pointer data)
28787 {
28788   PubCollectPtr p;
28789 
28790   if (sdp == NULL || sdp->choice != Seq_descr_pub || (p = (PubCollectPtr) data) == NULL) {
28791     return;
28792   }
28793 
28794   if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQDESC, sdp, p->constraint)) {
28795     ValNodeAddPointer (&(p->list), OBJ_SEQDESC, sdp);
28796   }
28797 }
28798 
GetPubsForAuthorFixFeat(SeqFeatPtr sfp,Pointer data)28799 static void GetPubsForAuthorFixFeat (SeqFeatPtr sfp, Pointer data)
28800 {
28801   PubCollectPtr p;
28802 
28803   if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || (p = (PubCollectPtr) data) == NULL) {
28804     return;
28805   }
28806 
28807   if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint)) {
28808     ValNodeAddPointer (&(p->list), OBJ_SEQFEAT, sfp);
28809   }
28810 }
28811 
ApplyAuthorFixToSeqEntry(SeqEntryPtr sep,AuthorFixActionPtr action,FILE * log_fp)28812 static Boolean ApplyAuthorFixToSeqEntry (SeqEntryPtr sep, AuthorFixActionPtr action, FILE *log_fp)
28813 {
28814   PubCollectData  p;
28815   ValNodePtr      vnp, pub;
28816   PubdescPtr      pdp;
28817   SeqFeatPtr      sfp;
28818   SeqDescPtr      sdp;
28819   AuthListPtr     alp;
28820   ValNodePtr      names;
28821   AuthorPtr       ap;
28822   SeqSubmitPtr    ssp;
28823   SubmitBlockPtr  sbp;
28824   ContactInfoPtr  cip;
28825   CitSubPtr       csp;
28826   Int4            num_changed = 0;
28827 
28828   if (sep == NULL || action == NULL) {
28829     return FALSE;
28830   }
28831 
28832   MemSet (&p, 0, sizeof (PubCollectData));
28833   p.constraint = action->constraint;
28834   VisitDescriptorsInSep (sep, &p, GetPubsForAuthorFixDesc);
28835   VisitFeaturesInSep (sep, &p, GetPubsForAuthorFixFeat);
28836   for (vnp = p.list; vnp != NULL; vnp = vnp->next) {
28837     pdp = NULL;
28838     if (vnp->choice == OBJ_SEQFEAT) {
28839       sfp = (SeqFeatPtr) vnp->data.ptrvalue;
28840       if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
28841         pdp = sfp->data.value.ptrvalue;
28842       }
28843     } else if (vnp->choice == OBJ_SEQDESC) {
28844       sdp = (SeqDescPtr) vnp->data.ptrvalue;
28845       if (sdp != NULL && sdp->choice == Seq_descr_pub) {
28846         pdp = sdp->data.ptrvalue;
28847       }
28848     }
28849     if (pdp != NULL) {
28850       for (pub = pdp->pub; pub != NULL; pub = pub->next) {
28851         alp = GetAuthorListForPub (pub);
28852         if (alp != NULL && alp->choice == 1) {
28853           for (names = alp->names; names != NULL; names = names->next) {
28854             ap = names->data.ptrvalue;
28855             switch (action->fix_type) {
28856               case Author_fix_type_truncate_middle_initials:
28857                 if (TruncateAuthorMiddleInitials(ap)) {
28858                   num_changed++;
28859                 }
28860                 break;
28861               case Author_fix_type_strip_suffix:
28862                 if (StripSuffixFromAuthor(ap)) {
28863                   num_changed++;
28864                 }
28865                 break;
28866               case Author_fix_type_move_middle_to_first:
28867                 if (MoveAuthorMiddleToFirst (ap)) {
28868                   num_changed++;
28869                 }
28870                 break;
28871             }
28872           }
28873         }
28874       }
28875     }
28876   }
28877   ssp = FindSeqSubmitForSeqEntry (sep);
28878   if (ssp != NULL) {
28879     sbp = ssp->sub;
28880     if (sbp != NULL) {
28881       csp = sbp->cit;
28882       if (csp != NULL) {
28883         alp = csp->authors;
28884         if (alp != NULL && alp->choice == 1) {
28885           for (names = alp->names; names != NULL; names = names->next) {
28886             ap = names->data.ptrvalue;
28887             switch (action->fix_type) {
28888               case Author_fix_type_truncate_middle_initials:
28889                 if (TruncateAuthorMiddleInitials(ap)) {
28890                   num_changed++;
28891                 }
28892                 break;
28893               case Author_fix_type_strip_suffix:
28894                 if (StripSuffixFromAuthor(ap)) {
28895                   num_changed++;
28896                 }
28897                 break;
28898               case Author_fix_type_move_middle_to_first:
28899                 if (MoveAuthorMiddleToFirst (ap)) {
28900                   num_changed++;
28901                 }
28902                 break;
28903             }
28904           }
28905         }
28906       }
28907       cip = sbp->contact;
28908       if (cip != NULL) {
28909         ap = cip->contact;
28910         if (ap != NULL) {
28911           /*
28912           switch (action->fix_type) {
28913             case Author_fix_type_truncate_middle_initials:
28914               if (TruncateAuthorMiddleInitials(ap)) {
28915                 num_changed++;
28916               }
28917               break;
28918             case Author_fix_type_strip_suffix:
28919               if (StripSuffixFromAuthor(ap)) {
28920                 num_changed++;
28921               }
28922               break;
28923             case Author_fix_type_move_middle_to_first:
28924               if (MoveAuthorMiddleToFirst (ap)) {
28925                 num_changed++;
28926               }
28927               break;
28928           }
28929           */
28930         }
28931       }
28932     }
28933   }
28934 
28935   p.list = ValNodeFree (p.list);
28936   if (num_changed > 0) {
28937     if (log_fp != NULL) {
28938       fprintf (log_fp, "%s for %d names\n", s_AuthorFixActionNames[action->fix_type - 1], num_changed);
28939     }
28940     return TRUE;
28941   } else {
28942     return FALSE;
28943   }
28944 }
28945 
28946 
UpdateSequencesInSeqEntry(SeqEntryPtr sep,UpdateSequencesActionPtr a,FILE * log_fp,GlobalAlignFunc align_func)28947 static Boolean UpdateSequencesInSeqEntry (SeqEntryPtr sep, UpdateSequencesActionPtr a, FILE *log_fp, GlobalAlignFunc align_func)
28948 {
28949   FILE *fp;
28950   SeqEntryPtr   update_sequences;
28951   SeqEntryPtr   update_sep, orig_scope;
28952   ValNodePtr    err_msg_list = NULL, vnp;
28953   Boolean       chars_stripped = FALSE;
28954   Int4          orig_seq_num = 0, update_seq_num = 0;
28955   ValNodePtr    orig_list = NULL, update_list = NULL;
28956   ValNodePtr    unmatched_updates;
28957   BioseqSetPtr  top_bssp;
28958   Uint2         update_entityID;
28959   ValNodePtr    vnp_o, vnp_u;
28960   SeqAlignPtr   salp = NULL;
28961   Boolean       revcomp, data_in_log;
28962   BioseqPtr     update_bsp, orig_bsp;
28963   Boolean       rval = FALSE;
28964   Char          id_buf[255];
28965 
28966   if (sep == NULL || a == NULL || StringHasNoText (a->filename)) {
28967     return FALSE;
28968   }
28969 
28970   fp = FileOpen (a->filename, "r");
28971   if (fp == NULL)
28972   {
28973     if (log_fp != NULL)
28974     {
28975       fprintf (log_fp, "Unable to open %s for update sequences\n", a->filename);
28976     }
28977     return FALSE;
28978   }
28979   update_sequences = ImportNucleotideFASTASequencesFromFile (fp, TRUE, NULL, &err_msg_list,
28980                                                              &chars_stripped, TRUE);
28981   FileClose (fp);
28982   ValNodeFreeData (err_msg_list);
28983   AddUniqueUpdateSequenceIDs (update_sequences);
28984   if (update_sequences == NULL)
28985   {
28986     if (log_fp != NULL)
28987     {
28988       fprintf (log_fp, "Unable to read FASTA update sequences from %s\n", a->filename);
28989     }
28990     return FALSE;
28991   }
28992 
28993   if (chars_stripped && log_fp != NULL)
28994   {
28995     fprintf (log_fp, "Characters were stripped from FASTA update sequences in %s\n", a->filename);
28996   }
28997 
28998   top_bssp = BioseqSetNew ();
28999   top_bssp->_class = BioseqseqSet_class_genbank;
29000   top_bssp->seq_set = update_sequences;
29001   update_sep = SeqEntryNew ();
29002   update_sep->choice = 2;
29003   update_sep->data.ptrvalue = top_bssp;
29004   update_entityID = ObjMgrGetEntityIDForPointer (top_bssp);
29005   AssignIDsInEntityEx (update_entityID, 0, NULL, NULL);
29006 
29007   ListBioseqsInSeqEntry (sep, TRUE, &orig_seq_num, &orig_list);
29008   ListBioseqsInSeqEntry (update_sep, TRUE, &update_seq_num, &update_list);
29009   orig_scope = SeqEntrySetScope (sep);
29010   unmatched_updates = ShuffleUpdateBioseqListWithIndex (&update_list, orig_list);
29011   SeqEntrySetScope (orig_scope);
29012   if (log_fp != NULL && unmatched_updates != NULL)
29013   {
29014     for (vnp = unmatched_updates; vnp != NULL; vnp = vnp->next)
29015     {
29016       /* TODO - log unmatched update sequences? */
29017     }
29018   }
29019 
29020   RemoveSequencesWithoutUpdates (&orig_list, &update_list);
29021 
29022   for (vnp_o = orig_list, vnp_u = update_list;
29023        vnp_o != NULL && vnp_u != NULL;
29024        vnp_o = vnp_o->next, vnp_u = vnp_u->next)
29025   {
29026     orig_bsp = vnp_o->data.ptrvalue;
29027     update_bsp = vnp_u->data.ptrvalue;
29028     revcomp = FALSE;
29029     salp = AlignForSequenceUpdate (orig_bsp, update_bsp, &revcomp, align_func);
29030     /* TODO - warn about no alignment? */
29031 
29032     ReplaceOneSequence (salp, orig_bsp, update_bsp);
29033     if (revcomp)
29034     {
29035       BioseqRevComp (orig_bsp);
29036       SeqEntryExplore (sep, (Pointer) orig_bsp, RevCompFeats);
29037     }
29038     if (! AreSequenceResiduesIdentical (orig_bsp, update_bsp))
29039     {
29040       if (a->add_cit_subs)
29041       {
29042         AddCitSubToUpdatedSequence (orig_bsp, orig_bsp->idx.entityID, kSubmitterUpdateText);
29043       }
29044       RemoveQualityScores (orig_bsp, log_fp, &data_in_log);
29045       if (log_fp != NULL)
29046       {
29047         SeqIdWrite (SeqIdFindWorst (orig_bsp->id), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
29048         fprintf (log_fp, "Updated sequence %s.\n", id_buf);
29049       }
29050       rval = TRUE;
29051     }
29052     salp = SeqAlignFree (salp);
29053   }
29054 
29055   top_bssp->idx.deleteme = TRUE;
29056   DeleteMarkedObjects (update_entityID, 0, NULL);
29057   return rval;
29058 }
29059 
AddTransSplicingToGene(SeqFeatPtr sfp,Pointer userdata)29060 static void AddTransSplicingToGene (SeqFeatPtr sfp, Pointer userdata)
29061 
29062 {
29063   Int4Ptr    countP;
29064   Int4       numivals = 0;
29065   SeqLocPtr  slp = NULL;
29066   CharPtr    str;
29067 
29068   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
29069   countP = (Int4Ptr) userdata;
29070 
29071   if (sfp->excpt) {
29072     if (StringISearch (sfp->except_text, "trans-splicing") != NULL) return;
29073   }
29074 
29075   while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
29076     numivals++;
29077   }
29078   if (numivals < 2) return;
29079 
29080   sfp->excpt = TRUE;
29081   if (sfp->except_text == NULL) {
29082     sfp->except_text = StringSave ("trans-splicing");
29083   } else {
29084     str = (CharPtr) MemNew (sizeof (Char) * (sizeof (sfp->except_text) + 20));
29085     if (str != NULL) {
29086       sprintf (str, "%s,trans-splicing", sfp->except_text);
29087       MemFree (sfp->except_text);
29088       sfp->except_text = str;
29089     }
29090   }
29091 
29092   if (countP != NULL) {
29093     (*countP)++;
29094   }
29095 }
29096 
LookForBioseqSetFields(BioseqSetPtr bssp,Pointer userdata)29097 static void LookForBioseqSetFields (BioseqSetPtr bssp, Pointer userdata)
29098 
29099 {
29100   BoolPtr  bp;
29101 
29102   if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return;
29103   bp = (BoolPtr) userdata;
29104   if (bp == NULL) return;
29105   *bp = TRUE;
29106 }
29107 
AddTransSplicingInSeqEntry(SeqEntryPtr sep,FILE * log_fp)29108 static Boolean AddTransSplicingInSeqEntry (SeqEntryPtr sep, FILE *log_fp)
29109 {
29110   Int4     count = 0;
29111   Boolean  is_small_genome_set = FALSE;
29112 
29113   if (sep == NULL) return FALSE;
29114 
29115   VisitSetsInSep (sep, (Pointer) &is_small_genome_set, LookForBioseqSetFields);
29116   if (! is_small_genome_set) return FALSE;
29117 
29118   VisitFeaturesInSep (sep, (Pointer) &count, AddTransSplicingToGene);
29119 
29120   return (Boolean) (count > 0);
29121 }
29122 
RemoveInvalidECnumbersInSeqEntry(SeqEntryPtr sep,FILE * log_fp)29123 static Boolean RemoveInvalidECnumbersInSeqEntry (SeqEntryPtr sep, FILE *log_fp)
29124 {
29125   Int4  count = 0;
29126 
29127   if (sep == NULL) return FALSE;
29128 
29129   count += UpdateReplacedECNumbers (sep);
29130   count += DeleteBadECNumbers (sep);
29131 
29132   return (Boolean) (count > 0);
29133 }
29134 
29135 
29136 typedef struct tsaidfromdefline {
29137   TextPortionPtr text_portion;
29138   CharPtr suffix;
29139   Int4    num_created;
29140 } TSAIdFromDeflineData, PNTR TSAIdFromDeflinePtr;
29141 
29142 
CreateTSAIDsFromDeflineCallback(BioseqPtr bsp,Pointer data)29143 static void CreateTSAIDsFromDeflineCallback (BioseqPtr bsp, Pointer data)
29144 {
29145   TSAIdFromDeflinePtr t;
29146   CharPtr      db;
29147   SeqDescrPtr  sdp;
29148   SeqMgrDescContext dcontext;
29149   CharPtr      str;
29150   SeqIdPtr     sip_new;
29151   DbtagPtr     dbtag;
29152 
29153   if (bsp == NULL || ISA_aa (bsp->mol)
29154       || (t = (TSAIdFromDeflinePtr) data) == NULL
29155       || (db = GetTSAIDDB(bsp)) == NULL) {
29156     return;
29157   }
29158 
29159   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
29160   if (sdp == NULL
29161       || (str = GetTextPortionFromString ((CharPtr) sdp->data.ptrvalue, t->text_portion)) == NULL) {
29162     db = MemFree (db);
29163     return;
29164   }
29165 
29166   dbtag = DbtagNew ();
29167   dbtag->db = db;
29168   dbtag->tag = ObjectIdNew ();
29169   if (t->suffix == NULL) {
29170     dbtag->tag->str = str;
29171     str = NULL;
29172   } else {
29173     dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str) + StringLen (t->suffix) + 2));
29174     sprintf (dbtag->tag->str, "%s.%s", str, t->suffix);
29175     str = MemFree (str);
29176   }
29177   sip_new = ValNodeNew (NULL);
29178   sip_new->choice = SEQID_GENERAL;
29179   sip_new->data.ptrvalue = dbtag;
29180   sip_new->next = bsp->id;
29181   bsp->id = sip_new;
29182   SeqMgrReplaceInBioseqIndex (bsp);
29183   t->num_created ++;
29184 }
29185 
29186 
CreateTSAIDsFromDeflineInSep(SeqEntryPtr sep,CharPtr suffix,TextPortionPtr t)29187 NLM_EXTERN Int4 CreateTSAIDsFromDeflineInSep (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr t)
29188 {
29189   TSAIdFromDeflineData td;
29190 
29191   td.text_portion = t;
29192   td.suffix = suffix;
29193   td.num_created = 0;
29194 
29195   VisitBioseqsInSep (sep, &td, CreateTSAIDsFromDeflineCallback);
29196 
29197   return td.num_created;
29198 }
29199 
29200 
29201 typedef struct tsa_id_callback {
29202   SeqEntryPtr top_sep;
29203   CharPtr suffix;
29204   TextPortionPtr text_portion;
29205   Int4    num_created;
29206 } TSAIdCallbackData, PNTR TSAIdCallbackPtr;
29207 
29208 
MakeTSAIdsCallback(BioseqPtr bsp,Pointer data)29209 static void MakeTSAIdsCallback (BioseqPtr bsp, Pointer data)
29210 {
29211   SeqIdPtr        sip, sip_tsa = NULL, sip_local = NULL;
29212   TSAIdCallbackPtr t;
29213   SeqIdPtr        sip_new = NULL;
29214   DbtagPtr        dbtag, dbtag_old = NULL;
29215   CharPtr         db = NULL;
29216   ObjectIdPtr     oip = NULL;
29217   Int4            id_num = 0;
29218   CharPtr         id_str = NULL, tmp_str = NULL;
29219   CharPtr         cp;
29220 
29221   if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL || !IsTSA (bsp)) {
29222     return;
29223   }
29224 
29225   t = (TSAIdCallbackPtr) data;
29226 
29227   for (sip = bsp->id;
29228        sip != NULL;
29229        sip = sip->next)
29230   {
29231     if (sip->choice == SEQID_LOCAL)
29232     {
29233       sip_local = sip;
29234     }
29235     else if (sip->choice == SEQID_GENERAL
29236                && (dbtag_old = (DbtagPtr) sip->data.ptrvalue) != NULL
29237                && (StringNCmp (dbtag_old->db, "gpid:", 5) == 0 || StringNCmp (dbtag_old->db, "bpid:", 5) == 0))
29238     {
29239       sip_tsa = sip;
29240     }
29241   }
29242 
29243   if (sip_tsa == NULL && sip_local == NULL) {
29244     return;
29245   }
29246   db = GetTSAIDDB (bsp);
29247   if (db == NULL) {
29248     return;
29249   }
29250 
29251   dbtag = DbtagNew ();
29252   dbtag->db = db;
29253   dbtag->tag = ObjectIdNew ();
29254 
29255   if (sip_tsa != NULL && (dbtag_old = (DbtagPtr) sip_tsa->data.ptrvalue) != NULL) {
29256     oip = dbtag_old->tag;
29257     sip = sip_tsa;
29258   } else if (sip_local != NULL) {
29259     oip = sip_local->data.ptrvalue;
29260     sip = sip_local;
29261   }
29262 
29263   if (oip == NULL) {
29264     return;
29265   }
29266 
29267   if (oip->str == NULL) {
29268     if (t->text_portion != NULL) {
29269       tmp_str = (CharPtr) MemNew (sizeof (Char) * 16);
29270       sprintf (tmp_str, "%d", oip->id);
29271       id_str = GetTextPortionFromString (tmp_str, t->text_portion);
29272       tmp_str = MemFree (tmp_str);
29273     } else {
29274       id_num = oip->id;
29275     }
29276   } else {
29277     if (t->text_portion == NULL) {
29278       id_str = StringSave (oip->str);
29279       if (sip == sip_tsa && (cp = StringRChr (id_str, '.')) != NULL) {
29280         *cp = 0;
29281       }
29282     } else {
29283       id_str = GetTextPortionFromString (oip->str, t->text_portion);
29284     }
29285   }
29286 
29287   if (id_num == 0 && id_str == NULL && StringHasNoText (t->suffix)) {
29288     return;
29289   }
29290 
29291   if (t->suffix == NULL) {
29292     if (id_str == NULL) {
29293       dbtag->tag->id = id_num;
29294     } else {
29295       dbtag->tag->str = StringSave (id_str);
29296     }
29297   } else {
29298     if (id_str == NULL) {
29299       dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (16 + StringLen (t->suffix)));
29300       sprintf (dbtag->tag->str, "%d.%s", id_num, t->suffix);
29301     } else {
29302       dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (StringLen (id_str) + StringLen (t->suffix) + 2));
29303       sprintf (dbtag->tag->str, "%s.%s", id_str, t->suffix);
29304     }
29305   }
29306   id_str = MemFree (id_str);
29307   sip_new = ValNodeNew (NULL);
29308   sip_new->choice = SEQID_GENERAL;
29309   sip_new->data.ptrvalue = dbtag;
29310   sip = SeqIdDup (sip);
29311   ReplaceSeqIdWithSeqId (sip, sip_new, t->top_sep);
29312   sip = SeqIdFree (sip);
29313   sip_new = SeqIdFree (sip_new);
29314   t->num_created++;
29315 }
29316 
29317 
ConvertLocalIdsToTSAIds(SeqEntryPtr sep,CharPtr suffix,TextPortionPtr tp)29318 NLM_EXTERN Int4 ConvertLocalIdsToTSAIds (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr tp)
29319 {
29320   TSAIdCallbackData t;
29321 
29322   t.top_sep = sep;
29323   t.suffix = suffix;
29324   t.text_portion = tp;
29325   t.num_created = 0;
29326   VisitBioseqsInSep (sep, &t, MakeTSAIdsCallback);
29327   return t.num_created;
29328 }
29329 
29330 
EditTSAIdsCallback(BioseqPtr bsp,Pointer data)29331 static void EditTSAIdsCallback (BioseqPtr bsp, Pointer data)
29332 {
29333   SeqIdPtr        sip, sip_tsa = NULL;
29334   TSAIdCallbackPtr t;
29335   SeqIdPtr        sip_new = NULL;
29336   DbtagPtr        dbtag, dbtag_old = NULL;
29337   ObjectIdPtr     oip = NULL;
29338   Int4            id_num = 0;
29339   CharPtr         id_str = NULL, tmp_str = NULL;
29340   CharPtr         cp;
29341 
29342   if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL || !IsTSA (bsp)) {
29343     return;
29344   }
29345 
29346   t = (TSAIdCallbackPtr) data;
29347 
29348   for (sip = bsp->id;
29349        sip != NULL && sip_tsa == NULL;
29350        sip = sip->next)
29351   {
29352     if (sip->choice == SEQID_GENERAL
29353                && (dbtag_old = (DbtagPtr) sip->data.ptrvalue) != NULL
29354                && (StringNCmp (dbtag_old->db, "gpid:", 5) == 0 || StringNCmp (dbtag_old->db, "bpid:", 5) == 0))
29355     {
29356       sip_tsa = sip;
29357     }
29358   }
29359 
29360   if (sip_tsa == NULL) {
29361     return;
29362   }
29363 
29364   oip = dbtag_old->tag;
29365 
29366   if (oip->str == NULL) {
29367     if (t->text_portion != NULL) {
29368       tmp_str = (CharPtr) MemNew (sizeof (Char) * 16);
29369       sprintf (tmp_str, "%d", oip->id);
29370       id_str = GetTextPortionFromString (tmp_str, t->text_portion);
29371       tmp_str = MemFree (tmp_str);
29372     } else {
29373       id_num = oip->id;
29374     }
29375   } else {
29376     if (t->text_portion == NULL) {
29377       id_str = StringSave (oip->str);
29378       if ((cp = StringRChr (id_str, '.')) != NULL) {
29379         *cp = 0;
29380       }
29381     } else {
29382       id_str = GetTextPortionFromString (oip->str, t->text_portion);
29383     }
29384   }
29385 
29386   if (id_num == 0 && id_str == NULL && StringHasNoText (t->suffix)) {
29387     return;
29388   }
29389 
29390   dbtag = DbtagNew ();
29391   dbtag->db = StringSave (dbtag_old->db);
29392   dbtag->tag = ObjectIdNew ();
29393 
29394   if (t->suffix == NULL) {
29395     if (id_str == NULL) {
29396       dbtag->tag->id = id_num;
29397     } else {
29398       dbtag->tag->str = StringSave (id_str);
29399     }
29400   } else {
29401     if (id_str == NULL) {
29402       dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (16 + StringLen (t->suffix)));
29403       sprintf (dbtag->tag->str, "%d.%s", id_num, t->suffix);
29404     } else {
29405       dbtag->tag->str = (CharPtr) MemNew (sizeof (Char) * (StringLen (oip->str) + StringLen (t->suffix) + 2));
29406       sprintf (dbtag->tag->str, "%s.%s", id_str, t->suffix);
29407     }
29408   }
29409   id_str = MemFree (id_str);
29410   sip_tsa = SeqIdDup (sip_tsa);
29411   sip_new = ValNodeNew (NULL);
29412   sip_new->choice = SEQID_GENERAL;
29413   sip_new->data.ptrvalue = dbtag;
29414   ReplaceSeqIdWithSeqId (sip_tsa, sip_new, t->top_sep);
29415   sip_new = SeqIdFree (sip_new);
29416   sip_tsa = SeqIdFree (sip_tsa);
29417   t->num_created++;
29418 }
29419 
29420 
EditTSAIds(SeqEntryPtr sep,CharPtr suffix,TextPortionPtr tp)29421 NLM_EXTERN Int4 EditTSAIds (SeqEntryPtr sep, CharPtr suffix, TextPortionPtr tp)
29422 {
29423   TSAIdCallbackData t;
29424 
29425   t.top_sep = sep;
29426   t.suffix = suffix;
29427   t.text_portion = tp;
29428   t.num_created = 0;
29429   VisitBioseqsInSep (sep, &t, EditTSAIdsCallback);
29430   return t.num_created;
29431 }
29432 
29433 
29434 static Boolean
CreateTsaIDsInSeqEntry(SeqEntryPtr sep,CreateTSAIdsActionPtr action,FILE * log_fp)29435 CreateTsaIDsInSeqEntry
29436 (SeqEntryPtr           sep,
29437  CreateTSAIdsActionPtr action,
29438  FILE *                log_fp)
29439 {
29440   Int4 num_created = 0;
29441 
29442   if (sep == NULL || action == NULL || action->src == NULL) {
29443     return FALSE;
29444   }
29445 
29446   switch (action->src->choice) {
29447     case CreateTSAIdsSrc_local_id:
29448       num_created = ConvertLocalIdsToTSAIds (sep, action->suffix, action->id_text_portion);
29449       break;
29450     case CreateTSAIdsSrc_defline:
29451       num_created = CreateTSAIDsFromDeflineInSep (sep, action->suffix, action->src->data.ptrvalue);
29452       break;
29453   }
29454 
29455   if (num_created > 0) {
29456     if (log_fp != NULL) {
29457       fprintf (log_fp, "Created %d TSA IDs\n", num_created);
29458     }
29459     return TRUE;
29460   } else {
29461     return FALSE;
29462   }
29463 }
29464 
29465 
PerformAutofixInSeqEntry(SeqEntryPtr sep,AutofixActionPtr action,FILE * log_fp)29466 static Boolean PerformAutofixInSeqEntry
29467 (SeqEntryPtr      sep,
29468  AutofixActionPtr action,
29469  FILE *           log_fp)
29470 {
29471   Int4 num_created = 0;
29472   DiscrepancyType test_type;
29473   DiscrepancyConfigPtr dcp;
29474   LogInfoData          lid;
29475   Int4                 i;
29476   ValNodePtr           results;
29477   ValNodePtr           sep_list = NULL;
29478 
29479   if (sep == NULL || action == NULL || (test_type = GetDiscrepancyTypeFromSettingName (action->test_name)) == MAX_DISC_TYPE) {
29480     return FALSE;
29481   }
29482 
29483   dcp = DiscrepancyConfigNew();
29484   for (i = 0; i < MAX_DISC_TYPE; i++)
29485   {
29486     dcp->conf_list[i] = FALSE;
29487   }
29488   dcp->conf_list[test_type] = TRUE;
29489 
29490   ValNodeAddPointer (&sep_list, 0, sep);
29491   results = CollectDiscrepancies (dcp, sep_list, NULL);
29492   sep_list = ValNodeFree (sep_list);
29493   dcp = DiscrepancyConfigFree (dcp);
29494 
29495   MemSet (&lid, 0, sizeof (LogInfoData));
29496   lid.fp = log_fp;
29497   AutofixDiscrepancies (results, TRUE, &lid);
29498   results = FreeClickableList (results);
29499   return lid.data_in_log;
29500 }
29501 
29502 
29503 typedef struct taxnameconsistencydata {
29504   CharPtr taxname;
29505   Boolean first;
29506   Boolean consistent;
29507 } TaxnameConsistencyData, PNTR TaxnameConsistencyPtr;
29508 
29509 
TaxnameConsistencyBiosourceCallback(BioSourcePtr biop,Pointer data)29510 static void TaxnameConsistencyBiosourceCallback (BioSourcePtr biop, Pointer data)
29511 {
29512   TaxnameConsistencyPtr tp;
29513   if (biop == NULL || (tp = (TaxnameConsistencyPtr)data) == NULL) {
29514     return;
29515   }
29516   if (biop->org == NULL || StringHasNoText (biop->org->taxname)) {
29517     if (tp->first) {
29518       tp->first = FALSE;
29519     } else if (tp->taxname != NULL) {
29520       tp->consistent = FALSE;
29521     }
29522   } else if (tp->first) {
29523     tp->taxname = biop->org->taxname;
29524     tp->first = FALSE;
29525   } else if (StringCmp (tp->taxname, biop->org->taxname) != 0) {
29526     tp->consistent = FALSE;
29527   }
29528 }
29529 
29530 
AreTaxnamesConsistent(BioseqSetPtr bssp)29531 static Boolean AreTaxnamesConsistent (BioseqSetPtr bssp)
29532 {
29533   TaxnameConsistencyData td;
29534 
29535   td.taxname = NULL;
29536   td.first = TRUE;
29537   td.consistent = TRUE;
29538 
29539   VisitBioSourcesInSet (bssp, &td, TaxnameConsistencyBiosourceCallback);
29540   return td.consistent;
29541 }
29542 
29543 
FixPopToPhySets(SeqEntryPtr sep)29544 static Int4 FixPopToPhySets (SeqEntryPtr sep)
29545 {
29546   BioseqSetPtr bssp;
29547   Int4 rval = 0;
29548 
29549   if (sep == NULL
29550       || !IS_Bioseq_set (sep)
29551       || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
29552     return 0;
29553   }
29554 
29555   if (bssp->_class == BioseqseqSet_class_pop_set && !AreTaxnamesConsistent(bssp)) {
29556     bssp->_class = BioseqseqSet_class_phy_set;
29557     rval++;
29558   }
29559 
29560   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
29561     rval += FixPopToPhySets(sep);
29562   }
29563   return rval;
29564 }
29565 
29566 
PerformFixSetsInSeqEntry(SeqEntryPtr sep,FixSetsActionPtr action,FILE * log_fp)29567 static Boolean PerformFixSetsInSeqEntry
29568 (SeqEntryPtr      sep,
29569  FixSetsActionPtr action,
29570  FILE *           log_fp)
29571 {
29572   Int4                 num_renormalized;
29573   Boolean              rval = FALSE;
29574 
29575   if (sep == NULL || action == NULL) {
29576     return FALSE;
29577   }
29578 
29579   switch (action->choice) {
29580     case FixSetsAction_remove_single_item_set:
29581       num_renormalized = RemoveSingleItemSet (sep, TRUE);
29582       if (num_renormalized > 0) {
29583         if (log_fp != NULL) {
29584           fprintf (log_fp, "Removed %d wrapper sets\n", num_renormalized);
29585         }
29586         rval = TRUE;
29587       }
29588       break;
29589     case FixSetsAction_renormalize_nuc_prot_sets:
29590       num_renormalized = RenormalizeNucProtSets (sep, TRUE);
29591       if (num_renormalized > 0) {
29592         if (log_fp != NULL) {
29593           fprintf (log_fp, "Renormalized %d sets\n", num_renormalized);
29594         }
29595         rval = TRUE;
29596       }
29597       break;
29598     case FixSetsAction_fix_pop_to_phy:
29599       num_renormalized = FixPopToPhySets (sep);
29600       if (num_renormalized > 0) {
29601         if (log_fp != NULL) {
29602           fprintf (log_fp, "Converted %d sets\n", num_renormalized);
29603         }
29604         rval = TRUE;
29605       }
29606       break;
29607   }
29608 
29609 
29610   return rval;
29611 }
29612 
29613 
PerformApplyTableInSeqEntry(SeqEntryPtr sep,ApplyTableActionPtr action,FILE * log_fp)29614 static Boolean PerformApplyTableInSeqEntry
29615 (SeqEntryPtr      sep,
29616  ApplyTableActionPtr action,
29617  FILE *           log_fp)
29618 {
29619   Boolean            rval = FALSE;
29620   ValNodePtr         table, data_table;
29621   FILE              *fp;
29622   ValNodePtr         err_list = NULL;
29623   ValNodePtr         val, vnp, obj_table = NULL, dup_dest_errs;
29624   ValNodePtr         columns = NULL, dup_col_list;
29625   TabColumnConfigPtr t;
29626 
29627   if (action == NULL) {
29628     return FALSE;
29629   }
29630   if (action->in_memory_table != NULL && action->in_memory_table->data.ptrvalue == NULL) {
29631     if (log_fp != NULL) {
29632       fprintf (log_fp, "In memory table missing from apply table action.\n");
29633     }
29634     return FALSE;
29635   }
29636 
29637   if (action->in_memory_table == NULL) {
29638     if (StringHasNoText (action->filename)) {
29639       return FALSE;
29640     }
29641     fp = FileOpen (action->filename, "r");
29642     if (fp == NULL)
29643     {
29644       if (log_fp != NULL) {
29645         fprintf (log_fp, "Unable to open %s\n", action->filename);
29646       }
29647       return FALSE;
29648     }
29649     table = ReadTabTableFromFile (fp);
29650     FileClose (fp);
29651 
29652     if (table == NULL) {
29653       if (log_fp != NULL) {
29654         fprintf (log_fp, "Unable to read table from %s\n", action->filename);
29655       }
29656       return FALSE;
29657     }
29658   } else {
29659     table = (ValNodePtr) action->in_memory_table->data.ptrvalue;
29660   }
29661 
29662   if (table->next == NULL) {
29663     if (log_fp != NULL) {
29664       fprintf (log_fp, "Table must have at least two rows, one header and one data, unable to apply table from %s\n", action->filename);
29665     }
29666     if (action->in_memory_table == NULL) {
29667       table = FreeTabTable (table);
29668     }
29669     return FALSE;
29670   }
29671 
29672   data_table = table;
29673 
29674   t = TabColumnConfigNew ();
29675   t->match_type = MatchTypeFromTableMatchType (action->match_type);
29676 
29677   if (t->match_type == NULL) {
29678     if (log_fp != NULL) {
29679       fprintf (log_fp, "No match type for table, unable to apply table from %s\n", action->filename);
29680     }
29681     t = TabColumnConfigFree (t);
29682     if (action->in_memory_table == NULL) {
29683       table = FreeTabTable (table);
29684     }
29685     return FALSE;
29686   } else if (t->match_type->choice == eTableMatchAny) {
29687     if (table->next->next != NULL) {
29688       if (log_fp != NULL) {
29689         fprintf (log_fp, "Table must only have two rows for Match All Rows option, unable to apply table from %s\n", action->filename);
29690       }
29691       t = TabColumnConfigFree (t);
29692       if (action->in_memory_table == NULL) {
29693         table = FreeTabTable (table);
29694       }
29695       return FALSE;
29696     } else {
29697       data_table = table->next;
29698     }
29699   }
29700   ValNodeAddPointer (&columns, 0, t);
29701 
29702   rval = TRUE;
29703   for (val = table->data.ptrvalue, vnp = columns;
29704        val != NULL && rval;
29705        val = val->next, vnp = vnp->next) {
29706     if (vnp == NULL) {
29707       vnp = ValNodeNew (columns);
29708     }
29709     t = vnp->data.ptrvalue;
29710     if (t == NULL) {
29711       t = TabColumnConfigNew ();
29712       vnp->data.ptrvalue = t;
29713     }
29714     if (t->match_type == NULL && t->field == NULL) {
29715       t->field = FieldTypeFromString (val->data.ptrvalue);
29716       if (t->field == NULL) {
29717         t = TabColumnConfigFree (t);
29718         vnp->data.ptrvalue = NULL;
29719         rval = FALSE;
29720         if (log_fp != NULL) {
29721           fprintf (log_fp, "%s not recognized as qualifier name, unable to apply table from %s\n",
29722                    (CharPtr) val->data.ptrvalue, action->filename);
29723         }
29724       } else {
29725         if (IsFieldTypeCDSProduct(t->field)) {
29726           t->match_mrna = action->also_change_mrna;
29727         }
29728         t->skip_blank = action->skip_blanks;
29729       }
29730     }
29731   }
29732   if (rval) {
29733     dup_col_list = CheckForDuplicateColumns (columns);
29734     if (dup_col_list != NULL) {
29735       FixDuplicateColumns (columns);
29736       dup_col_list = ValNodeFreeData(dup_col_list);
29737     }
29738 
29739     if (log_fp != NULL) {
29740       err_list = ValidateTabTableValues (data_table, columns);
29741       for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
29742         fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
29743       }
29744       err_list = ValNodeFreeData (err_list);
29745     }
29746 
29747     obj_table = GetObjectTableForTabTable (sep, data_table, columns, &err_list);
29748 
29749     dup_dest_errs = CheckObjTableForRowsThatApplyToTheSameDestination (obj_table);
29750     if (dup_dest_errs != NULL) {
29751       if (log_fp != NULL) {
29752         for (vnp = dup_dest_errs; vnp != NULL; vnp = vnp->next) {
29753           fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
29754         }
29755         fprintf (log_fp, "For one or more columns, two or more rows in the table apply to the same object.  Cannot apply table.");
29756       }
29757       dup_dest_errs = ValNodeFreeData (dup_dest_errs);
29758       rval = FALSE;
29759     } else {
29760       ValNodeLink (&err_list, CheckObjTableForExistingText (sep, data_table, columns, obj_table));
29761 
29762       /* look for errors with choice 1 */
29763       for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
29764         if (vnp->choice == 1) {
29765           if (log_fp != NULL) {
29766             fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
29767           }
29768         }
29769       }
29770       /* look for errors with choice 0 */
29771        for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
29772         if (vnp->choice == 0) {
29773           if (log_fp != NULL) {
29774             fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
29775           }
29776         }
29777       }
29778 
29779       err_list = ValNodeFreeData (err_list);
29780 
29781       if (!rval) {
29782         DeleteMarkedObjects (SeqMgrGetEntityIDForSeqEntry (sep), 0, NULL);
29783       } else {
29784         err_list =  ApplyTableValuesToObjectTable (sep, data_table, columns, obj_table);
29785         if (log_fp != NULL) {
29786           for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
29787             fprintf (log_fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
29788           }
29789         }
29790       }
29791     }
29792     obj_table = FreeObjectTableForTabTable (obj_table);
29793   }
29794   err_list = ValNodeFreeData (err_list);
29795   if (action->in_memory_table == NULL) {
29796     table = FreeTabTable (table);
29797   }
29798   columns = TabColumnConfigListFree (columns);
29799 
29800   return rval;
29801 }
29802 
29803 
29804 typedef struct addfiledescriptorsdata {
29805   SeqDescPtr sdp_list;
29806   ValNodePtr constraint;
29807   Int4       num_affected;
29808 } AddFileDescriptorsData, PNTR AddFileDescriptorsPtr;
29809 
29810 
AddFileDescriptorsCallback(BioseqPtr bsp,Pointer data)29811 static void AddFileDescriptorsCallback (BioseqPtr bsp, Pointer data)
29812 {
29813   AddFileDescriptorsPtr a;
29814   BioseqSetPtr bssp = NULL;
29815 
29816 
29817   if (bsp == NULL || (a = (AddFileDescriptorsPtr) data) == NULL || ISA_aa (bsp->mol)) {
29818     return;
29819   }
29820 
29821   if (!DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, a->constraint)) {
29822     return;
29823   }
29824   if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
29825     bssp = (BioseqSetPtr) bsp->idx.parentptr;
29826   }
29827   if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
29828     ValNodeLink (&(bssp->descr), AsnIoMemCopy (a->sdp_list, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite));
29829   } else {
29830     ValNodeLink (&(bsp->descr), AsnIoMemCopy (a->sdp_list, (AsnReadFunc) SeqDescrAsnRead, (AsnWriteFunc) SeqDescrAsnWrite));
29831   }
29832   a->num_affected++;
29833 }
29834 
29835 
AddFileDescriptors(SeqEntryPtr sep,AddDescriptorListActionPtr action,FILE * log_fp)29836 static Boolean AddFileDescriptors
29837 (SeqEntryPtr      sep,
29838  AddDescriptorListActionPtr action,
29839  FILE *           log_fp)
29840 {
29841   Boolean            rval = FALSE;
29842   SeqDescPtr         sdp, sdp_next;
29843   AsnIoPtr           aip;
29844   AddFileDescriptorsData a;
29845 
29846   if (action == NULL || action->descriptor_list == NULL) {
29847     return FALSE;
29848   }
29849   if (action->descriptor_list->in_memory_table != NULL && action->descriptor_list->in_memory_table->data.ptrvalue == NULL) {
29850     if (log_fp != NULL) {
29851       fprintf (log_fp, "In memory table missing from add file descriptors action.\n");
29852     }
29853     return FALSE;
29854   }
29855   MemSet (&a, 0, sizeof (AddFileDescriptorsData));
29856   a.constraint = action->constraint;
29857 
29858   if (action->descriptor_list->in_memory_table == NULL) {
29859     if (StringHasNoText (action->descriptor_list->filename)) {
29860       return FALSE;
29861     }
29862     aip = AsnIoOpen (action->descriptor_list->filename, "r");
29863     if (aip == NULL)
29864     {
29865       if (log_fp != NULL) {
29866         fprintf (log_fp, "Unable to open %s\n", action->descriptor_list->filename);
29867       }
29868       return FALSE;
29869     }
29870 
29871     while (sdp = SeqDescAsnRead (aip, NULL)) {
29872       ValNodeLink (&(a.sdp_list), sdp);
29873     }
29874     AsnIoClose (aip);
29875 
29876     if (a.sdp_list == NULL) {
29877       if (log_fp != NULL) {
29878         fprintf (log_fp, "Unable to read table from %s\n", action->descriptor_list->filename);
29879       }
29880       return FALSE;
29881     }
29882   } else {
29883     a.sdp_list = (ValNodePtr) action->descriptor_list->in_memory_table->data.ptrvalue;
29884   }
29885 
29886   VisitBioseqsInSep (sep, &a, AddFileDescriptorsCallback);
29887   if (a.num_affected > 0) {
29888     rval = TRUE;
29889     if (log_fp != NULL) {
29890       fprintf (log_fp, "Applied descriptors from %s to %d bioseqs\n", action->descriptor_list->filename, a.num_affected);
29891     }
29892   }
29893 
29894   if (action->descriptor_list->in_memory_table == NULL) {
29895     for (sdp = a.sdp_list; sdp != NULL; sdp = sdp_next) {
29896       sdp_next = sdp->next;
29897       sdp->next = NULL;
29898       sdp = SeqDescFree (sdp);
29899     }
29900   }
29901 
29902   return rval;
29903 }
29904 
29905 
AutoApplyStructuredCommentPrefixesCallback(SeqDescPtr sdp,Pointer data)29906 static void AutoApplyStructuredCommentPrefixesCallback (SeqDescPtr sdp, Pointer data)
29907 {
29908   UserObjectPtr uop;
29909   CharPtr       prefix;
29910 
29911   if (data != NULL && sdp != NULL && sdp->choice == Seq_descr_user
29912       && (uop = (UserObjectPtr) sdp->data.ptrvalue) != NULL
29913       && uop->type != NULL
29914       && StringICmp (uop->type->str, "StructuredComment") == 0
29915       && (prefix = AutoapplyStructuredCommentPrefix (uop)) != NULL) {
29916     ValNodeAddPointer ((ValNodePtr PNTR) data, 0, prefix);
29917   }
29918 }
29919 
29920 
AutoApplyStructuredCommentPrefixes(SeqEntryPtr sep,FILE * log_fp)29921 static Boolean AutoApplyStructuredCommentPrefixes (SeqEntryPtr sep, FILE *log_fp)
29922 {
29923   ValNodePtr added = NULL, vnp;
29924   Int4 count = 0;
29925   CharPtr curr_prefix = NULL;
29926 
29927   VisitDescriptorsInSep (sep, &added, AutoApplyStructuredCommentPrefixesCallback);
29928   if (added == NULL) {
29929     return FALSE;
29930   }
29931   if (log_fp != NULL) {
29932     added = ValNodeSort (added, SortVnpByString);
29933     curr_prefix = added->data.ptrvalue;
29934     count = 1;
29935     for (vnp = added->next; vnp != NULL; vnp = vnp->next) {
29936       if (StringCmp (curr_prefix, vnp->data.ptrvalue) == 0) {
29937         count++;
29938       } else {
29939         fprintf (log_fp, "Added %d %s structured comment prefix%s\n", count, curr_prefix, count == 1 ? "" : "es");
29940         curr_prefix = vnp->data.ptrvalue;
29941         count = 1;
29942       }
29943     }
29944     fprintf (log_fp, "Added %d %s structured comment prefix%s\n", count, curr_prefix, count == 1 ? "" : "es");
29945   }
29946   added = ValNodeFree (added);
29947   return TRUE;
29948 }
29949 
29950 
29951 typedef struct performremovesequencesdata{
29952   RemoveSequencesActionPtr action;
29953   Boolean any;
29954   FILE *log_fp;
29955 } PerformRemoveSequencesData, PNTR PerformRemoveSequencesPtr;
29956 
PerformRemoveSequencesInSeqEntryCallback(BioseqPtr bsp,Pointer userdata)29957 static void PerformRemoveSequencesInSeqEntryCallback (BioseqPtr bsp, Pointer userdata)
29958 {
29959   PerformRemoveSequencesPtr p;
29960   Char id_buf[PATH_MAX];
29961   SeqEntryPtr sep;
29962   BioseqSetPtr bssp;
29963 
29964   p = (PerformRemoveSequencesPtr) userdata;
29965   if (p == NULL || p->action == NULL || p->action->constraint == NULL) {
29966     return;
29967   }
29968   if (DoesObjectMatchConstraintChoiceSet (OBJ_BIOSEQ, bsp, p->action->constraint)) {
29969     if (!ISA_aa (bsp->mol)
29970         && (sep = GetBestTopParentForData (bsp->idx.entityID, bsp)) != NULL
29971         && IS_Bioseq_set (sep)
29972         && (bssp = (BioseqSetPtr) sep->data.ptrvalue) != NULL) {
29973       bssp->idx.deleteme = TRUE;
29974     } else {
29975       bsp->idx.deleteme = TRUE;
29976     }
29977     p->any = TRUE;
29978     if (p->log_fp != NULL) {
29979       SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
29980       fprintf (p->log_fp, "Removed %s\n", id_buf);
29981     }
29982   }
29983 }
29984 
29985 
PerformRemoveSequencesInSeqEntry(SeqEntryPtr sep,RemoveSequencesActionPtr action,FILE * log_fp)29986 static Boolean PerformRemoveSequencesInSeqEntry
29987 (SeqEntryPtr      sep,
29988  RemoveSequencesActionPtr action,
29989  FILE *           log_fp)
29990 {
29991   PerformRemoveSequencesData prd;
29992 
29993   if (action == NULL || action->constraint == NULL) {
29994     return FALSE;
29995   }
29996 
29997   prd.action = action;
29998   prd.any = FALSE;
29999   prd.log_fp = log_fp;
30000 
30001   VisitBioseqsInSep (sep, &prd, PerformRemoveSequencesInSeqEntryCallback);
30002   if (prd.any) {
30003     DeleteMarkedObjects (SeqMgrGetEntityIDForSeqEntry(sep), 0, NULL);
30004   }
30005 
30006   return prd.any;
30007 }
30008 
30009 
30010 typedef struct propagateseqtech {
30011   BioseqPtr bsp;
30012   UserObjectPtr uop;
30013   CharPtr filename;
30014 } PropagateSeqTechData, PNTR PropagateSeqTechPtr;
30015 
30016 
IsStructuredCommentWithPrefix(UserObjectPtr uop,CharPtr prefix)30017 static Boolean IsStructuredCommentWithPrefix (UserObjectPtr uop, CharPtr prefix)
30018 {
30019   UserFieldPtr ufp;
30020 
30021   if (uop == NULL || uop->type == NULL || StringICmp (uop->type->str, "StructuredComment") != 0) {
30022     return FALSE;
30023   }
30024 
30025   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
30026     if (ufp->label != NULL && StringICmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
30027       if (ufp->choice != 1 || StringCmp (ufp->data.ptrvalue, prefix) != 0) {
30028         return FALSE;
30029       } else {
30030         return TRUE;
30031       }
30032     }
30033   }
30034   return FALSE;
30035 }
30036 
30037 
TruncateAtLocalId(SeqIdPtr sip_local,CharPtr filename)30038 static Boolean TruncateAtLocalId (SeqIdPtr sip_local, CharPtr filename)
30039 {
30040   Char   id_buf[20];
30041   CharPtr cmp;
30042   ObjectIdPtr oip;
30043   Boolean removed_id = FALSE;
30044   Int4    len, f_len;
30045 
30046   if (filename == NULL || sip_local == NULL
30047       || (oip = (ObjectIdPtr) sip_local->data.ptrvalue) == NULL) {
30048     return FALSE;
30049   }
30050   f_len = StringLen (filename);
30051 
30052   if (oip->id > 0) {
30053     sprintf (id_buf, "%d", oip->id);
30054     cmp = id_buf;
30055   } else {
30056     cmp = oip->str;
30057   }
30058   len = StringLen (cmp);
30059   if (f_len > len + 1 && filename[f_len - len - 1] == '/'
30060       && StringCmp (filename + (f_len - len), cmp) == 0) {
30061     filename[f_len - len - 1] = 0;
30062     removed_id = TRUE;
30063   }
30064   return removed_id;
30065 }
30066 
30067 
FindSeqTechBsp(BioseqPtr bsp,Pointer data)30068 static void FindSeqTechBsp (BioseqPtr bsp, Pointer data)
30069 {
30070   SeqIdPtr sip, sip_local = NULL;
30071   DbtagPtr dbtag;
30072   PropagateSeqTechPtr p;
30073   CharPtr cp;
30074   SeqDescPtr sdp;
30075   SeqMgrDescContext context;
30076 
30077   if (bsp == NULL || ISA_aa(bsp->mol) || data == NULL) {
30078     return;
30079   }
30080 
30081   p = (PropagateSeqTechPtr) MemNew (sizeof (PropagateSeqTechData));
30082   p->bsp = bsp;
30083   /* find NCBIFILE id */
30084   for (sip = bsp->id; sip != NULL; sip = sip->next) {
30085     if (sip->choice == SEQID_GENERAL
30086         && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL
30087         && (StringICmp (dbtag->db, "NCBIFILE") == 0)) {
30088       p->filename = StringSave (dbtag->tag->str);
30089     } else if (sip->choice == SEQID_LOCAL) {
30090       sip_local = sip;
30091     }
30092   }
30093   if (p->filename == NULL) {
30094     p = MemFree (p);
30095     return;
30096   }
30097   if (!TruncateAtLocalId(sip_local, p->filename)) {
30098     cp = StringRChr (p->filename, '/');
30099     if (cp != NULL) {
30100       *cp = 0;
30101     }
30102   }
30103 
30104   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
30105        sdp != NULL && p->uop == NULL;
30106        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context)) {
30107     if (IsStructuredCommentWithPrefix(sdp->data.ptrvalue, "##Assembly-Data-START##")) {
30108       p->uop = sdp->data.ptrvalue;
30109     }
30110   }
30111 
30112   ValNodeAddPointer ((ValNodePtr PNTR) data, 0, p);
30113 }
30114 
30115 
SortVnpByPropagateSeqTech(VoidPtr ptr1,VoidPtr ptr2)30116 static int LIBCALLBACK SortVnpByPropagateSeqTech (VoidPtr ptr1, VoidPtr ptr2)
30117 
30118 {
30119   PropagateSeqTechPtr     str1;
30120   PropagateSeqTechPtr     str2;
30121   ValNodePtr  vnp1;
30122   ValNodePtr  vnp2;
30123   int comp = 0;
30124 
30125   if (ptr1 != NULL && ptr2 != NULL) {
30126     vnp1 = *((ValNodePtr PNTR) ptr1);
30127     vnp2 = *((ValNodePtr PNTR) ptr2);
30128     if (vnp1 != NULL && vnp2 != NULL) {
30129       str1 = (PropagateSeqTechPtr) vnp1->data.ptrvalue;
30130       str2 = (PropagateSeqTechPtr) vnp2->data.ptrvalue;
30131       if (str1 != NULL && str2 != NULL) {
30132         comp = StringCmp (str1->filename, str2->filename);
30133         if (comp == 0) {
30134           if (str1->uop == NULL) {
30135             comp = 1;
30136           } else if (str2->uop == NULL) {
30137             comp = -1;
30138           }
30139         }
30140       }
30141     }
30142   }
30143   return comp;
30144 }
30145 
30146 
PerformPropagateSequenceTechnology(SeqEntryPtr sep,Pointer action,FILE * log_fp)30147 static Boolean PerformPropagateSequenceTechnology
30148 (SeqEntryPtr      sep,
30149  Pointer          action,
30150  FILE *           log_fp)
30151 {
30152   ValNodePtr list = NULL, vnp;
30153   PropagateSeqTechPtr p1, p2;
30154   Boolean rval = FALSE;
30155   SeqDescPtr   sdp;
30156   Int4         num_added = 0;
30157 
30158   VisitBioseqsInSep (sep, &list, FindSeqTechBsp);
30159   list = ValNodeSort (list, SortVnpByPropagateSeqTech);
30160 
30161   if (list == NULL) {
30162     return FALSE;
30163   }
30164   p1 = list->data.ptrvalue;
30165   for (vnp = list->next; vnp != NULL; vnp = vnp->next) {
30166     p2 = vnp->data.ptrvalue;
30167     if (p1->uop == NULL || StringCmp (p1->filename, p2->filename) != 0) {
30168       p1 = p2;
30169     } else if (p2->uop == NULL) {
30170       sdp = CreateNewDescriptorOnBioseq (p2->bsp, Seq_descr_user);
30171       sdp->data.ptrvalue = AsnIoMemCopy (p1->uop, (AsnReadFunc) UserObjectAsnRead, (AsnWriteFunc) UserObjectAsnWrite);
30172       num_added ++;
30173       rval = TRUE;
30174     }
30175   }
30176 
30177   for (vnp = list; vnp != NULL; vnp = vnp->next) {
30178     p1 = vnp->data.ptrvalue;
30179     p1->filename = MemFree (p1->filename);
30180     vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
30181   }
30182   list = ValNodeFree (list);
30183   if (num_added > 0 && log_fp != NULL) {
30184     fprintf (log_fp, "Added %d Assembly Data descriptors.\n", num_added);
30185   }
30186 
30187   return rval;
30188 }
30189 
30190 
30191 typedef struct ecrepdata {
30192   CharPtr  before;
30193   CharPtr  after;
30194 } EcRepData, PNTR EcRepPtr;
30195 
30196 
EcRepFree(EcRepPtr e)30197 static EcRepPtr EcRepFree (EcRepPtr e)
30198 {
30199   if (e != NULL) {
30200     e->before = MemFree (e->before);
30201     e->after = MemFree (e->after);
30202     e = MemFree (e);
30203   }
30204   return e;
30205 }
30206 
30207 
SortVnpByEcBefore(VoidPtr ptr1,VoidPtr ptr2)30208 static int LIBCALLBACK SortVnpByEcBefore (VoidPtr ptr1, VoidPtr ptr2)
30209 
30210 {
30211   EcRepPtr    erp1, erp2;
30212   CharPtr     str1, str2;
30213   ValNodePtr  vnp1, vnp2;
30214 
30215   if (ptr1 == NULL || ptr2 == NULL) return 0;
30216   vnp1 = *((ValNodePtr PNTR) ptr1);
30217   vnp2 = *((ValNodePtr PNTR) ptr2);
30218   if (vnp1 == NULL || vnp2 == NULL) return 0;
30219   erp1 = (EcRepPtr) vnp1->data.ptrvalue;
30220   erp2 = (EcRepPtr) vnp2->data.ptrvalue;
30221   if (erp1 == NULL || erp2 == NULL) return 0;
30222   str1 = erp1->before;
30223   str2 = erp2->before;
30224   if (str1 == NULL || str2 == NULL) return 0;
30225   return StringCmp (str1, str2);
30226 }
30227 
SetupECReplacementTable(CharPtr file,Int4Ptr len)30228 static EcRepPtr PNTR SetupECReplacementTable (CharPtr file, Int4Ptr len)
30229 
30230 {
30231   EcRepPtr    erp;
30232   FileCache   fc;
30233   FILE        *fp = NULL;
30234   Int4        i;
30235   ValNodePtr  last = NULL;
30236   Char        line [512];
30237   Char        path [PATH_MAX];
30238   CharPtr     ptr;
30239   ErrSev      sev;
30240   CharPtr     str;
30241   ValNodePtr  vnp;
30242   ValNodePtr     ec_rep_list = NULL;
30243   EcRepPtr PNTR  ec_rep_data = NULL;
30244   Int4           ec_rep_len = 0;
30245 
30246   if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
30247     FileBuildPath (path, NULL, file);
30248     sev = ErrSetMessageLevel (SEV_ERROR);
30249     fp = FileOpen (path, "r");
30250     ErrSetMessageLevel (sev);
30251     if (fp != NULL) {
30252       FileCacheSetup (&fc, fp);
30253 
30254       str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
30255       while (str != NULL) {
30256         if (StringDoesHaveText (str)) {
30257           ptr = StringChr (str, '\t');
30258           if (ptr != NULL) {
30259             *ptr = '\0';
30260             ptr++;
30261             erp = (EcRepPtr) MemNew (sizeof (EcRepData));
30262             if (erp != NULL) {
30263               erp->before = StringSave (str);
30264               erp->after = StringSave (ptr);
30265               vnp = ValNodeAddPointer (&last, 0, (Pointer) erp);
30266               if (ec_rep_list == NULL) {
30267                 ec_rep_list = vnp;
30268               }
30269               last = vnp;
30270             }
30271           }
30272         }
30273         str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
30274       }
30275 
30276       FileClose (fp);
30277       ec_rep_len = ValNodeLen (ec_rep_list);
30278       if (ec_rep_len > 0) {
30279         ec_rep_list = ValNodeSort (ec_rep_list, SortVnpByEcBefore);
30280         ec_rep_data = (EcRepPtr PNTR) MemNew (sizeof (EcRepPtr) * (ec_rep_len + 1));
30281         if (ec_rep_data != NULL) {
30282           for (vnp = ec_rep_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
30283             erp = (EcRepPtr) vnp->data.ptrvalue;
30284             ec_rep_data [i] = erp;
30285           }
30286         }
30287       }
30288     }
30289   }
30290   ec_rep_list = ValNodeFree (ec_rep_list);
30291   *len = ec_rep_len;
30292   return ec_rep_data;
30293 }
30294 
30295 
FreeECReplacementTable(EcRepPtr PNTR ec_rep_data,Int4 ec_rep_len)30296 static EcRepPtr PNTR FreeECReplacementTable (EcRepPtr PNTR ec_rep_data, Int4 ec_rep_len)
30297 {
30298   Int4 i;
30299 
30300   if (ec_rep_data == NULL) {
30301     return NULL;
30302   }
30303   for (i = 0; i < ec_rep_len; i++) {
30304     ec_rep_data[i] = EcRepFree(ec_rep_data[i]);
30305   }
30306   ec_rep_data = MemFree (ec_rep_data);
30307   return ec_rep_data;
30308 }
30309 
30310 
GetEcReplacementFromTable(CharPtr str,EcRepPtr PNTR ec_rep_data,Int4 ec_rep_len)30311 static EcRepPtr GetEcReplacementFromTable (CharPtr str, EcRepPtr PNTR ec_rep_data, Int4 ec_rep_len)
30312 {
30313   Int4     L, R, mid;
30314   EcRepPtr erp = NULL;
30315 
30316   L = 0;
30317   R = ec_rep_len - 1;
30318   while (L < R) {
30319     mid = (L + R) / 2;
30320     erp = ec_rep_data [(int) mid];
30321     if (erp != NULL && StringCmp (erp->before, str) < 0) {
30322       L = mid + 1;
30323     } else {
30324       R = mid;
30325     }
30326   }
30327   erp = ec_rep_data [(int) R];
30328   return erp;
30329 }
30330 
30331 
30332 typedef struct replaceupdatedec {
30333   FILE *log_fp;
30334   UpdateReplacedEcNumbersActionPtr action;
30335   EcRepPtr PNTR  ec_rep_data;
30336   Int4 ec_rep_len;
30337   Int4 num_removed;
30338   Int4 num_replaced;
30339 } ReplaceUpdatedECData, PNTR ReplaceUpdatedEcPtr;
30340 
30341 
GetLocusTagFromProtRef(SeqFeatPtr sfp,CharPtr PNTR p_locus_tag)30342 static Boolean GetLocusTagFromProtRef (SeqFeatPtr sfp, CharPtr PNTR p_locus_tag)
30343 
30344 {
30345   BioseqPtr          bsp;
30346   SeqFeatPtr         cds;
30347   SeqMgrFeatContext  fcontext;
30348   SeqFeatPtr         gene;
30349   GeneRefPtr         grp;
30350 
30351   if (sfp == NULL || p_locus_tag == NULL) return FALSE;
30352   grp = SeqMgrGetGeneXref (sfp);
30353   if (grp != NULL) {
30354     if (SeqMgrGeneIsSuppressed (grp)) return FALSE;
30355     if (StringDoesHaveText (grp->locus_tag)) {
30356       *p_locus_tag = StringSave (grp->locus_tag);
30357       return TRUE;
30358     } else if (StringDoesHaveText (grp->locus)) {
30359       *p_locus_tag = StringSave (grp->locus);
30360       return TRUE;
30361     }
30362   }
30363   bsp = BioseqFindFromSeqLoc (sfp->location);
30364   if (bsp == NULL) return FALSE;
30365   cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
30366   if (cds == NULL) return FALSE;
30367   grp = SeqMgrGetGeneXref (cds);
30368   if (grp != NULL) {
30369     if (SeqMgrGeneIsSuppressed (grp)) return FALSE;
30370     if (StringDoesHaveText (grp->locus_tag)) {
30371       *p_locus_tag = StringSave (grp->locus_tag);
30372       return TRUE;
30373     } else if (StringDoesHaveText (grp->locus)) {
30374       *p_locus_tag = StringSave (grp->locus);
30375       return TRUE;
30376     }
30377   }
30378   gene = SeqMgrGetOverlappingGene (cds->location, &fcontext);
30379   if (gene == NULL || gene->data.choice != SEQFEAT_GENE) return FALSE;
30380   grp = (GeneRefPtr) gene->data.value.ptrvalue;
30381   if (grp != NULL) {
30382     if (SeqMgrGeneIsSuppressed (grp)) return FALSE;
30383     if (StringDoesHaveText (grp->locus_tag)) {
30384       *p_locus_tag = StringSave (grp->locus_tag);
30385       return TRUE;
30386     } else if (StringDoesHaveText (grp->locus)) {
30387       *p_locus_tag = StringSave (grp->locus);
30388       return TRUE;
30389     }
30390   }
30391   return FALSE;
30392 }
30393 
30394 
UpdateECCallback(SeqFeatPtr sfp,Pointer userdata)30395 static void UpdateECCallback (SeqFeatPtr sfp, Pointer userdata)
30396 
30397 {
30398   ProtRefPtr  prp;
30399   CharPtr     str;
30400   ValNodePtr  vnp;
30401   CharPtr     locus_tag = NULL;
30402   ReplaceUpdatedEcPtr r;
30403   EcRepPtr    erp;
30404 
30405   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
30406   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
30407   if (prp == NULL || prp->ec == NULL) return;
30408   r = (ReplaceUpdatedEcPtr) userdata;
30409   if (r == NULL) {
30410     return;
30411   }
30412   GetLocusTagFromProtRef (sfp, &locus_tag);
30413 
30414   for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
30415     str = (CharPtr) vnp->data.ptrvalue;
30416     if (StringHasNoText (str)) continue;
30417     if (ValidateECnumber (str)) {
30418       erp = GetEcReplacementFromTable(str, r->ec_rep_data, r->ec_rep_len);
30419       if (erp != NULL && StringCmp (erp->before, str) == 0) {
30420         if (StringChr (erp->after, '\t') == NULL) {
30421           if (r->log_fp != NULL) {
30422             fprintf (r->log_fp, "%s:replaced %s with %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, erp->before, erp->after);
30423           }
30424           vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
30425           vnp->data.ptrvalue = StringSave (erp->after);
30426           r->num_replaced++;
30427         } else if (r->action->delete_multiple_replacement) {
30428           if (r->log_fp != NULL) {
30429             fprintf (r->log_fp, "%s: removed %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, erp->before);
30430           }
30431           vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
30432           r->num_removed++;
30433         }
30434       }
30435       str = vnp->data.ptrvalue;
30436       if ( str != NULL && r->action->delete_unrecognized && ECnumberNotInList (str)) {
30437         if (r->log_fp != NULL) {
30438           fprintf (r->log_fp, "%s: deleted %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, str);
30439         }
30440         vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
30441         r->num_removed++;
30442       }
30443     } else {
30444       if (r->action->delete_improper_format) {
30445         if (r->log_fp != NULL) {
30446           fprintf (r->log_fp, "%s: removed %s\n", locus_tag == NULL ? "No locus tag" : locus_tag, str);
30447         }
30448         vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
30449         r->num_removed++;
30450       }
30451     }
30452   }
30453   locus_tag = MemFree (locus_tag);
30454 }
30455 
30456 
ReplaceUpdatedECNumbers(SeqEntryPtr sep,UpdateReplacedEcNumbersActionPtr action,FILE * log_fp)30457 static Boolean ReplaceUpdatedECNumbers (SeqEntryPtr sep, UpdateReplacedEcNumbersActionPtr action, FILE *log_fp)
30458 {
30459   ReplaceUpdatedECData r;
30460 
30461   MemSet (&r, 0, sizeof (ReplaceUpdatedECData));
30462   r.action = action;
30463   r.log_fp = log_fp;
30464   r.ec_rep_data = SetupECReplacementTable ("ecnum_replaced.txt", &(r.ec_rep_len));
30465 
30466   VisitFeaturesInSep (sep, (Pointer) &r, UpdateECCallback);
30467 
30468   r.ec_rep_data = FreeECReplacementTable(r.ec_rep_data, r.ec_rep_len);
30469   if (r.num_removed > 0 || r.num_replaced > 0) {
30470     return TRUE;
30471   } else {
30472     return FALSE;
30473   }
30474 }
30475 
30476 
30477 typedef struct retranslatecdscallback {
30478   Int4 num_retranslated;
30479   RetranslateCdsActionPtr action;
30480 } RetranslateCDSCallbackData, PNTR RetranslateCDSCallbackPtr;
30481 
PerformRetranslationsCallback(SeqFeatPtr sfp,Pointer data)30482 static void PerformRetranslationsCallback (SeqFeatPtr sfp, Pointer data)
30483 {
30484   RetranslateCDSCallbackPtr r;
30485 
30486   if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION
30487       && (r = (RetranslateCDSCallbackPtr) data) != NULL
30488       && r->action != NULL
30489       && RetranslateOneCDS (sfp, sfp->idx.entityID, !r->action->obey_stop_codon, r->action->obey_stop_codon)) {
30490     r->num_retranslated++;
30491   }
30492 }
30493 
30494 
PerformRetranslations(SeqEntryPtr sep,RetranslateCdsActionPtr action,FILE * log_fp)30495 static Boolean PerformRetranslations (SeqEntryPtr sep, RetranslateCdsActionPtr action, FILE *log_fp)
30496 {
30497   RetranslateCDSCallbackData r;
30498 
30499   MemSet (&r, 0, sizeof (RetranslateCDSCallbackData));
30500   r.action = action;
30501 
30502   VisitFeaturesInSep (sep, &r, PerformRetranslationsCallback);
30503   if (r.num_retranslated > 0) {
30504     if (log_fp != NULL) {
30505       fprintf (log_fp, "Retranslated %d coding regions\n", r.num_retranslated);
30506     }
30507     return TRUE;
30508   } else {
30509     return FALSE;
30510   }
30511 }
30512 
30513 
30514 typedef struct adjustfeaturesforgapscallback {
30515   Int4 num_processed;
30516   AdjustFeaturesForGapsActionPtr action;
30517 } AdjustFeaturesForGapCallbackData, PNTR AdjustFeaturesForGapCallbackPtr;
30518 
PerformAdjustFeaturesForGapsCallback(SeqFeatPtr sfp,Pointer data)30519 static void PerformAdjustFeaturesForGapsCallback (SeqFeatPtr sfp, Pointer data)
30520 {
30521   AdjustFeaturesForGapCallbackPtr r;
30522 
30523   if (sfp != NULL
30524       && (r = (AdjustFeaturesForGapCallbackPtr) data) != NULL
30525       && r->action != NULL) {
30526 
30527     AdjustFeatureForGapsCallback (sfp, r->action);
30528     r->num_processed++;
30529   }
30530 }
30531 
30532 
PerformAdjustFeaturesForGaps(SeqEntryPtr sep,AdjustFeaturesForGapsActionPtr action,FILE * log_fp)30533 static Boolean PerformAdjustFeaturesForGaps (SeqEntryPtr sep, AdjustFeaturesForGapsActionPtr action, FILE *log_fp)
30534 {
30535   AdjustFeaturesForGapCallbackData r;
30536 
30537   MemSet (&r, 0, sizeof (AdjustFeaturesForGapCallbackData));
30538   r.action = action;
30539 
30540   VisitFeaturesInSep (sep, &r, PerformAdjustFeaturesForGapsCallback);
30541   if (r.num_processed > 0) {
30542     if (log_fp != NULL) {
30543       fprintf (log_fp, "Adjusted %d features for gaps\n", r.num_processed);
30544     }
30545     return TRUE;
30546   } else {
30547     return FALSE;
30548   }
30549 }
30550 
30551 
SummarizePerformAutofixAction(AutofixActionPtr action)30552 NLM_EXTERN CharPtr SummarizePerformAutofixAction (AutofixActionPtr action)
30553 {
30554   DiscrepancyType test_type;
30555   CharPtr         fmt = "Perform Autofix for %s Discrepancy Report Test";
30556   CharPtr         summ;
30557 
30558   if (action == NULL || (test_type = GetDiscrepancyTypeFromSettingName (action->test_name)) == MAX_DISC_TYPE) {
30559     return NULL;
30560   }
30561 
30562   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (action->test_name)));
30563   sprintf (summ, fmt, action->test_name);
30564   return summ;
30565 }
30566 
30567 
SummarizeFixSetsAction(FixSetsActionPtr action)30568 NLM_EXTERN CharPtr SummarizeFixSetsAction (FixSetsActionPtr action)
30569 {
30570   CharPtr summ = NULL;
30571 
30572   if (action == NULL) {
30573     return NULL;
30574   }
30575 
30576   switch (action->choice) {
30577     case FixSetsAction_remove_single_item_set:
30578       summ = StringSave ("Remove single-sequence pop, phy, mut, or eco wrapper set without alignment");
30579       break;
30580     case FixSetsAction_renormalize_nuc_prot_sets:
30581       summ = StringSave ("Renormalize nuc-prot sets");
30582       break;
30583     case FixSetsAction_fix_pop_to_phy:
30584       summ = StringSave ("Convert pop sets to phy sets when taxnames are inconsistent");
30585       break;
30586   }
30587 
30588   return summ;
30589 }
30590 
30591 
SummarizeUpdateSequencesAction(UpdateSequencesActionPtr action)30592 NLM_EXTERN CharPtr SummarizeUpdateSequencesAction (UpdateSequencesActionPtr action)
30593 {
30594   CharPtr summ, fmt = "Update sequences with FASTA from file %s";
30595   CharPtr add_cit_subs = ", add Cit-subs to sequences changed";
30596   Int4    len;
30597 
30598   if (action == NULL || StringHasNoText (action->filename)) {
30599     return NULL;
30600   }
30601 
30602   len = StringLen(fmt) + StringLen (action->filename);
30603   if (action->add_cit_subs) {
30604     len += StringLen (add_cit_subs);
30605   }
30606   summ = (CharPtr) MemNew (sizeof (Char) * len);
30607   sprintf (summ, fmt, action->filename);
30608   if (action->add_cit_subs) {
30609     StringCat (summ, add_cit_subs);
30610   }
30611   return summ;
30612 }
30613 
30614 
SummarizeCreateTSAIDsAction(CreateTSAIdsActionPtr action)30615 NLM_EXTERN CharPtr SummarizeCreateTSAIDsAction (CreateTSAIdsActionPtr action)
30616 {
30617   CharPtr summ = NULL;
30618   CharPtr suffix_fmt = ", use suffix %s";
30619   CharPtr local_fmt = "Create TSA IDs from local IDs";
30620   CharPtr defline_fmt = "Create TSA IDs from %s in defline";
30621   CharPtr text_portion;
30622   Int4    len;
30623 
30624   if (action == NULL || action->src == NULL) {
30625     return NULL;
30626   }
30627 
30628   switch (action->src->choice) {
30629     case CreateTSAIdsSrc_local_id:
30630       len = StringLen (local_fmt) + 1;
30631       if (!StringHasNoText (action->suffix)) {
30632         len += StringLen (suffix_fmt) + StringLen (action->suffix);
30633       }
30634       summ = (CharPtr) MemNew (sizeof (Char) * len);
30635       sprintf (summ, "%s", local_fmt);
30636       if (!StringHasNoText (action->suffix)) {
30637         sprintf (summ + StringLen (summ), suffix_fmt, action->suffix);
30638       }
30639       break;
30640     case CreateTSAIdsSrc_defline:
30641       text_portion = SummarizeTextPortion (action->src->data.ptrvalue);
30642       if (text_portion == NULL) {
30643         text_portion = StringSave ("entire text");
30644       }
30645       len = StringLen (defline_fmt) + StringLen (text_portion);
30646       if (!StringHasNoText (action->suffix)) {
30647         len += StringLen (suffix_fmt) + StringLen (action->suffix);
30648       }
30649       summ = (CharPtr) MemNew (sizeof (Char) * len);
30650       sprintf (summ, defline_fmt, text_portion);
30651       text_portion = MemFree (text_portion);
30652       if (!StringHasNoText (action->suffix)) {
30653         sprintf (summ + StringLen (summ), suffix_fmt, action->suffix);
30654       }
30655       break;
30656   }
30657 
30658 
30659   return summ;
30660 }
30661 
30662 
SummarizeApplyTableAction(ApplyTableActionPtr action)30663 NLM_EXTERN CharPtr SummarizeApplyTableAction (ApplyTableActionPtr action)
30664 {
30665   CharPtr summ, fmt = "Apply table from file %s";
30666   Int4    len;
30667 
30668   if (action == NULL || StringHasNoText (action->filename)) {
30669     return NULL;
30670   }
30671 
30672   len = StringLen(fmt) + StringLen (action->filename);
30673   summ = (CharPtr) MemNew (sizeof (Char) * len);
30674   sprintf (summ, fmt, action->filename);
30675   return summ;
30676 }
30677 
30678 
SummarizeAddDescriptorListAction(AddDescriptorListActionPtr action)30679 NLM_EXTERN CharPtr SummarizeAddDescriptorListAction (AddDescriptorListActionPtr action)
30680 {
30681   CharPtr summ, fmt = "Add descriptors from file %s to nucleotide sequences";
30682   CharPtr constraint;
30683   Int4    len;
30684 
30685   if (action == NULL || action->descriptor_list == NULL || StringHasNoText (action->descriptor_list->filename)) {
30686     return NULL;
30687   }
30688   constraint = SummarizeConstraintSet (action->constraint);
30689 
30690   len = StringLen(fmt) + StringLen (action->descriptor_list->filename) + StringLen (constraint) + 1;
30691   summ = (CharPtr) MemNew (sizeof (Char) * len);
30692   sprintf (summ, fmt, action->descriptor_list->filename);
30693   if (constraint != NULL) {
30694     StringCat (summ, " ");
30695     StringCat (summ, constraint);
30696     constraint = MemFree (constraint);
30697   }
30698   return summ;
30699 }
30700 
30701 
SummarizeRemoveSequencesAction(RemoveSequencesActionPtr action)30702 NLM_EXTERN CharPtr SummarizeRemoveSequencesAction (RemoveSequencesActionPtr action)
30703 {
30704   CharPtr summ = NULL, constraint, fmt = "Remove sequences %s";
30705 
30706   if (action == NULL || action->constraint == NULL) {
30707     return NULL;
30708   }
30709   constraint = SummarizeConstraintSet (action->constraint);
30710   if (constraint != NULL) {
30711     summ = (CharPtr) MemNew (sizeof (CharPtr) * (StringLen (fmt) + StringLen (constraint)));
30712     sprintf (summ, fmt, constraint);
30713     constraint = MemFree (constraint);
30714   }
30715   return summ;
30716 }
30717 
30718 
SummarizePropagateSequenceTechnology(Pointer action)30719 NLM_EXTERN CharPtr SummarizePropagateSequenceTechnology (Pointer action)
30720 {
30721   return StringSave ("Propagate Assembly-Data structured comments to sequences with same filename");
30722 }
30723 
30724 
30725 /* Functions for summarizing macro actions for display */
30726 
SummarizeApplyFeatureAction(ApplyFeatureActionPtr a)30727 static CharPtr SummarizeApplyFeatureAction (ApplyFeatureActionPtr a)
30728 {
30729   CharPtr    label = NULL;
30730   CharPtr    str;
30731   CharPtr    fmt = "Apply %s";
30732 
30733   if (a == NULL) {
30734     str = StringSave ("No action");
30735   } else {
30736     label = GetFeatureNameFromFeatureType (a->type);
30737     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label)));
30738     sprintf (str, fmt, label);
30739   }
30740   return str;
30741 }
30742 
30743 
SummarizeRemoveFeatureAction(RemoveFeatureActionPtr a)30744 static CharPtr SummarizeRemoveFeatureAction (RemoveFeatureActionPtr a)
30745 {
30746   CharPtr    label = NULL;
30747   CharPtr    constraint, str;
30748   CharPtr    fmt = "Remove %s";
30749   CharPtr    constraint_fmt = "Remove %s %s";
30750 
30751   if (a == NULL) {
30752     str = StringSave ("No action");
30753   } else {
30754     label = GetFeatureNameFromFeatureType (a->type);
30755     constraint = SummarizeConstraintSet (a->constraint);
30756     if (constraint == NULL) {
30757       str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label)));
30758       sprintf (str, fmt, label);
30759     } else {
30760       str = (CharPtr) MemNew (sizeof (Char) * (StringLen (constraint_fmt) + StringLen (label) + StringLen (constraint)));
30761       sprintf (str, constraint_fmt, label, constraint);
30762       constraint = MemFree (constraint);
30763     }
30764   }
30765 
30766   return str;
30767 }
30768 
30769 
SummarizeConvertSourceOptions(ValNodePtr vnp)30770 static CharPtr SummarizeConvertSourceOptions (ValNodePtr vnp)
30771 {
30772   ConvertFromCDSOptionsPtr options;
30773   CharPtr fmt = "(%sremove overlapping mRNA, %sremove overlapping gene, %sremove transcript ID)";
30774   CharPtr str;
30775 
30776   if (vnp == NULL || vnp->choice != ConvertFeatureSrcOptions_cds || vnp->data.ptrvalue == NULL) {
30777     return NULL;
30778   }
30779 
30780   options = (ConvertFromCDSOptionsPtr) vnp->data.ptrvalue;
30781 
30782   str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 21));
30783   sprintf (str, fmt, options->remove_mRNA ? "" : "do not ",
30784                      options->remove_gene ? "" : "do not ",
30785                      options->remove_transcript_id ? "" : "do not ");
30786   return str;
30787 }
30788 
30789 
SummarizeConvertDestOptions(ValNodePtr vnp)30790 static CharPtr SummarizeConvertDestOptions (ValNodePtr vnp)
30791 {
30792   RegionTypePtr r;
30793   CharPtr str = NULL;
30794 
30795   if (vnp == NULL) return NULL;
30796 
30797   switch (vnp->choice) {
30798     case ConvertFeatureDstOptions_bond:
30799       str = StringSave (GetMacroBondTypeName(vnp->data.intvalue));
30800       break;
30801     case ConvertFeatureDstOptions_site:
30802       str = StringSave (GetMacroSiteTypeName(vnp->data.intvalue));
30803       break;
30804     case ConvertFeatureDstOptions_region:
30805       r = (RegionTypePtr) vnp->data.ptrvalue;
30806       if (r != NULL) {
30807         if (r->create_nucleotide) {
30808           str = StringSave ("on nucleotide sequence");
30809         } else {
30810           str = StringSave ("on protein sequence");
30811         }
30812       }
30813       break;
30814   }
30815   return str;
30816 }
30817 
30818 
SummarizeConvertFeatureAction(ConvertFeatureActionPtr a)30819 static CharPtr SummarizeConvertFeatureAction (ConvertFeatureActionPtr a)
30820 {
30821   CharPtr str = NULL, from_label, to_label, constraint, src_options, dst_options;
30822   CharPtr fmt = "Convert %s to %s";
30823   CharPtr keep_orig = ", keep original feature";
30824   CharPtr remove_orig = ", remove original feature";
30825   Int4    len;
30826 
30827   if (a == NULL) {
30828     str = StringSave ("No action");
30829   } else {
30830     from_label = GetFeatureNameFromFeatureType (a->type_from);
30831     to_label = GetFeatureNameFromFeatureType (a->type_to);
30832     src_options = SummarizeConvertSourceOptions (a->src_options);
30833     dst_options = SummarizeConvertDestOptions (a->dst_options);
30834     constraint = SummarizeConstraintSet (a->src_feat_constraint);
30835     len = StringLen (fmt) + StringLen (from_label) + StringLen (to_label);
30836     if (src_options != NULL) {
30837       len += StringLen (src_options) + 3;
30838     }
30839     if (dst_options != NULL) {
30840       len += StringLen (dst_options) + 1;
30841     }
30842     if (constraint != NULL) {
30843       len += StringLen (constraint) + 1;
30844     }
30845     if (a->leave_original) {
30846       len += StringLen (keep_orig);
30847     } else {
30848       len += StringLen (remove_orig);
30849     }
30850     str = (CharPtr) MemNew (sizeof (Char) * len);
30851     sprintf (str, fmt, from_label, to_label);
30852     if (dst_options != NULL) {
30853       StringCat (str, " ");
30854       StringCat (str, dst_options);
30855       dst_options = MemFree (dst_options);
30856     }
30857     if (src_options != NULL) {
30858       StringCat (str, ", ");
30859       StringCat (str, src_options);
30860       src_options = MemFree (src_options);
30861     }
30862     if (constraint != NULL) {
30863       StringCat (str, " ");
30864       StringCat (str, constraint);
30865       constraint = MemFree (constraint);
30866     }
30867     if (a->leave_original) {
30868       StringCat (str, keep_orig);
30869     } else {
30870       StringCat (str, remove_orig);
30871     }
30872   }
30873   return str;
30874 }
30875 
30876 
SummarizeEditLocationStrand(EditLocationStrandPtr strand)30877 static CharPtr SummarizeEditLocationStrand (EditLocationStrandPtr strand)
30878 {
30879   CharPtr from_label = NULL, to_label = NULL;
30880   CharPtr fmt = "Convert %s strand to %s";
30881   CharPtr str = NULL;
30882 
30883   if (strand == NULL) return NULL;
30884 
30885   switch (strand->strand_from) {
30886     case Feature_location_strand_from_any:
30887       from_label = "any";
30888       break;
30889     case Feature_location_strand_from_plus:
30890       from_label = "plus";
30891       break;
30892     case Feature_location_strand_from_minus:
30893       from_label = "minus";
30894       break;
30895     case Feature_location_strand_from_unknown:
30896       from_label = "unknown";
30897       break;
30898     case Feature_location_strand_from_both:
30899       from_label = "both";
30900       break;
30901   }
30902 
30903   switch (strand->strand_to) {
30904     case Feature_location_strand_to_plus:
30905       to_label = "plus";
30906       break;
30907     case Feature_location_strand_to_minus:
30908       to_label = "minus";
30909       break;
30910     case Feature_location_strand_to_unknown:
30911       to_label = "unknown";
30912       break;
30913     case Feature_location_strand_to_both:
30914       to_label = "both";
30915       break;
30916     case Feature_location_strand_to_reverse:
30917       to_label = "reverse";
30918       break;
30919   }
30920 
30921   if (from_label != NULL && to_label != NULL) {
30922     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (from_label) + StringLen (to_label)));
30923     sprintf (str, fmt, from_label, to_label);
30924   }
30925   return str;
30926 }
30927 
30928 
SummarizePartial5SetAction(Partial5SetActionPtr a)30929 static CharPtr SummarizePartial5SetAction (Partial5SetActionPtr a)
30930 {
30931   CharPtr str = NULL;
30932   CharPtr constraint = NULL, extend = NULL;
30933   CharPtr fmt = "Set 5' partial%s%s";
30934 
30935   if (a == NULL) return NULL;
30936 
30937   switch (a->constraint) {
30938     case Partial_5_set_constraint_all:
30939       constraint = "";
30940       break;
30941     case Partial_5_set_constraint_at_end:
30942       constraint = " when 5' end of location is at end of sequence";
30943       break;
30944     case Partial_5_set_constraint_bad_start:
30945       constraint = " when coding region has no start codon";
30946       break;
30947     case Partial_5_set_constraint_frame_not_one:
30948       constraint = " when coding region frame > 1";
30949       break;
30950   }
30951   if (a->extend) {
30952     extend = ", extend 5' end of feature to end of sequence";
30953   } else {
30954     extend = "";
30955   }
30956   if (constraint != NULL) {
30957     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt)
30958                                              + StringLen (constraint)
30959                                              + StringLen (extend)));
30960     sprintf (str, fmt, constraint, extend);
30961   }
30962   return str;
30963 }
30964 
30965 
SummarizePartial5ClearAction(Int4 a)30966 static CharPtr SummarizePartial5ClearAction (Int4 a)
30967 {
30968   CharPtr str = NULL;
30969 
30970   switch (a) {
30971     case Partial_5_clear_constraint_all:
30972       str = StringSave ("Clear 5' partial");
30973       break;
30974     case Partial_5_clear_constraint_not_at_end:
30975       str = StringSave ("Clear 5' partial when 5' end of feature is not at end of sequence");
30976       break;
30977     case Partial_5_clear_constraint_good_start:
30978       str = StringSave ("Clear 5' partial when coding region has start codon");
30979       break;
30980   }
30981   return str;
30982 }
30983 
30984 
SummarizePartial3SetAction(Partial3SetActionPtr a)30985 static CharPtr SummarizePartial3SetAction (Partial3SetActionPtr a)
30986 {
30987   CharPtr str = NULL;
30988   CharPtr constraint = NULL, extend = NULL;
30989   CharPtr fmt = "Set 3' partial%s%s";
30990 
30991   if (a == NULL) return NULL;
30992 
30993   switch (a->constraint) {
30994     case Partial_3_set_constraint_all:
30995       constraint = "";
30996       break;
30997     case Partial_3_set_constraint_at_end:
30998       constraint = " when 3' end of location is at end of sequence";
30999       break;
31000     case Partial_3_set_constraint_bad_end:
31001       constraint = " when coding region has no stop codon";
31002       break;
31003   }
31004   if (a->extend) {
31005     extend = ", extend 3' end of feature to end of sequence";
31006   } else {
31007     extend = "";
31008   }
31009   if (constraint != NULL) {
31010     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt)
31011                                              + StringLen (constraint)
31012                                              + StringLen (extend)));
31013     sprintf (str, fmt, constraint, extend);
31014   }
31015   return str;
31016 }
31017 
31018 
SummarizePartial3ClearAction(Int4 a)31019 static CharPtr SummarizePartial3ClearAction (Int4 a)
31020 {
31021   CharPtr str = NULL;
31022 
31023   switch (a) {
31024     case Partial_3_clear_constraint_all:
31025       str = StringSave ("Clear 3' partial");
31026       break;
31027     case Partial_3_clear_constraint_not_at_end:
31028       str = StringSave ("Clear 3' partial when 3' end of feature is not at end of sequence");
31029       break;
31030     case Partial_3_clear_constraint_good_end:
31031       str = StringSave ("Clear 3' partial when coding region has stop codon");
31032       break;
31033   }
31034   return str;
31035 }
31036 
31037 
SummarizePartialBothSetAction(PartialBothSetActionPtr a)31038 static CharPtr SummarizePartialBothSetAction (PartialBothSetActionPtr a)
31039 {
31040   CharPtr str = NULL;
31041   CharPtr constraint = NULL, extend = NULL;
31042   CharPtr fmt = "Set both ends partial%s%s";
31043 
31044   if (a == NULL) return NULL;
31045 
31046   switch (a->constraint) {
31047     case Partial_5_set_constraint_all:
31048       constraint = "";
31049       break;
31050     case Partial_5_set_constraint_at_end:
31051       constraint = " when both ends of location are at end of sequence";
31052       break;
31053   }
31054   if (a->extend) {
31055     extend = ", extend both ends of feature to end of sequence";
31056   } else {
31057     extend = "";
31058   }
31059   if (constraint != NULL) {
31060     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt)
31061                                              + StringLen (constraint)
31062                                              + StringLen (extend)));
31063     sprintf (str, fmt, constraint, extend);
31064   }
31065   return str;
31066 }
31067 
31068 
SummarizePartialBothClearAction(Int4 a)31069 static CharPtr SummarizePartialBothClearAction (Int4 a)
31070 {
31071   CharPtr str = NULL;
31072 
31073   switch (a) {
31074     case Partial_both_clear_constraint_all:
31075       str = StringSave ("Clear both ends partial");
31076       break;
31077     case Partial_3_clear_constraint_not_at_end:
31078       str = StringSave ("Clear both ends partial when both ends of feature are not at end of sequence");
31079       break;
31080   }
31081   return str;
31082 }
31083 
31084 
SummarizeConvertLoc(Int4 a)31085 static CharPtr SummarizeConvertLoc (Int4 a)
31086 {
31087   CharPtr str = NULL;
31088 
31089   switch (a) {
31090     case Convert_location_type_join:
31091       str = StringSave ("Convert location to join");
31092       break;
31093     case Convert_location_type_order:
31094       str = StringSave ("Convert location to order");
31095       break;
31096     case Convert_location_type_merge:
31097       str = StringSave ("Convert location to single interval");
31098       break;
31099   }
31100   return str;
31101 }
31102 
31103 
SummarizeFeatureDistance(ValNodePtr v,Int4 end)31104 static CharPtr SummarizeFeatureDistance (ValNodePtr v, Int4 end)
31105 {
31106   CharPtr fmt = " %s %d from %d' end of feature";
31107   CharPtr summ = NULL;
31108 
31109   if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) {
31110     return NULL;
31111   }
31112 
31113   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt)
31114                                             + StringLen (s_QuantityWords[v->choice - 1]) + 15));
31115   sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue, end);
31116   return summ;
31117 }
31118 
31119 
SummarizeExtendToFeature(ExtendToFeaturePtr efp,Boolean end5)31120 static CharPtr SummarizeExtendToFeature (ExtendToFeaturePtr efp, Boolean end5)
31121 {
31122   CharPtr fmt = "Extend %d' end of feature to nearest %s feature%s%s";
31123   CharPtr include_fmt = " (include %s location)";
31124   CharPtr feature, distance, include = NULL;
31125   Int4    len;
31126   CharPtr summ = NULL;
31127 
31128   if (efp == NULL) {
31129     return NULL;
31130   }
31131 
31132   feature = GetFeatureNameFromFeatureType (efp->type);
31133   distance = SummarizeFeatureDistance (efp->distance, end5 ? 5 : 3);
31134   len = StringLen (fmt) + StringLen (feature) + StringLen (distance);
31135   if (efp->include_feat) {
31136     include = (CharPtr) MemNew (sizeof (Char) * (StringLen (include_fmt) + StringLen (feature)));
31137     sprintf (include, include_fmt, feature);
31138     len += StringLen (include);
31139   }
31140 
31141   summ = (CharPtr) MemNew (sizeof (Char) * (len + 1));
31142   sprintf (summ, fmt, end5 ? 5 : 3, feature,
31143            distance == NULL ? "" : distance,
31144            include == NULL ? "" : include);
31145   distance = MemFree (distance);
31146   include = MemFree (include);
31147 
31148   return summ;
31149 }
31150 
31151 
SummarizeEditFeatureLocationAction(EditFeatureLocationActionPtr a)31152 static CharPtr SummarizeEditFeatureLocationAction (EditFeatureLocationActionPtr a)
31153 {
31154   CharPtr str = NULL, action_label = NULL, constraint, feature;
31155   CharPtr fmt = "%s for %s features";
31156   CharPtr constraint_fmt = "%s for %s features %s";
31157   CharPtr retranslate_cds = " and retranslated affected coding regions";
31158   CharPtr also_edit_gene = " and adjust overlapping gene";
31159   Int4    len;
31160 
31161   if (a == NULL || a->action == NULL) {
31162     str = StringSave ("No action");
31163   } else {
31164 
31165     switch (a->action->choice) {
31166       case LocationEditType_strand:
31167         action_label = SummarizeEditLocationStrand (a->action->data.ptrvalue);
31168         break;
31169       case LocationEditType_set_5_partial:
31170         action_label = SummarizePartial5SetAction (a->action->data.ptrvalue);
31171         break;
31172       case LocationEditType_clear_5_partial:
31173         action_label = SummarizePartial5ClearAction (a->action->data.intvalue);
31174         break;
31175       case LocationEditType_set_3_partial:
31176         action_label = SummarizePartial3SetAction (a->action->data.ptrvalue);
31177         break;
31178       case LocationEditType_clear_3_partial:
31179         action_label = SummarizePartial3ClearAction (a->action->data.intvalue);
31180         break;
31181       case LocationEditType_set_both_partial:
31182         action_label = SummarizePartialBothSetAction (a->action->data.ptrvalue);
31183         break;
31184       case LocationEditType_clear_both_partial:
31185         action_label = SummarizePartialBothClearAction (a->action->data.intvalue);
31186         break;
31187       case LocationEditType_convert:
31188         action_label = SummarizeConvertLoc (a->action->data.intvalue);
31189         break;
31190       case LocationEditType_extend_5:
31191         action_label = StringSave ("Extend 5' end of feature to end of sequence");
31192         break;
31193       case LocationEditType_extend_3:
31194         action_label = StringSave ("Extend 3' end of feature to end of sequence");
31195         break;
31196       case LocationEditType_extend_5_to_feat:
31197         action_label = SummarizeExtendToFeature (a->action->data.ptrvalue, TRUE);
31198         break;
31199       case LocationEditType_extend_3_to_feat:
31200         action_label = SummarizeExtendToFeature (a->action->data.ptrvalue, FALSE);
31201         break;
31202     }
31203     if (action_label == NULL) {
31204       str = StringSave ("Invalid action");
31205     } else {
31206       feature = GetFeatureNameFromFeatureType (a->type);
31207       constraint = SummarizeConstraintSet (a->constraint);
31208       len = 0;
31209       if (a->retranslate_cds) {
31210         len += StringLen (retranslate_cds);
31211       }
31212       if (a->also_edit_gene) {
31213         len += StringLen (also_edit_gene);
31214       }
31215       if (constraint == NULL) {
31216         len += StringLen (fmt) + StringLen (action_label) + StringLen (feature);
31217         str = (CharPtr) MemNew (sizeof (Char) * len);
31218         sprintf (str, fmt, action_label, feature);
31219       } else {
31220         len += StringLen (constraint_fmt) + StringLen (action_label) + StringLen (feature) + StringLen (constraint);
31221         str = (CharPtr) MemNew (sizeof (Char) * len);
31222         sprintf (str, constraint_fmt, action_label, feature, constraint);
31223         constraint = MemFree (constraint);
31224       }
31225       if (a->retranslate_cds) {
31226         StringCat (str, retranslate_cds);
31227       }
31228       if (a->also_edit_gene) {
31229         StringCat (str, also_edit_gene);
31230       }
31231     }
31232   }
31233   return str;
31234 }
31235 
31236 
31237 static CharPtr s_Suppression[] = { NULL, "suppressing", "non-suppressing" };
31238 static CharPtr s_Necessary[] = { NULL, "necessary", "unnecessary" };
31239 
SummarizeRemoveXref(RemoveXrefsActionPtr a)31240 static CharPtr SummarizeRemoveXref (RemoveXrefsActionPtr a)
31241 {
31242   CharPtr str = NULL, label, constraint;
31243   GeneXrefTypePtr g;
31244   CharPtr fmt = "Remove %s%s%s%sgene xrefs from %s features";
31245   CharPtr suppression, necessary;
31246   Int4 len;
31247 
31248   if (a == NULL || a->xref_type == NULL) {
31249     str = StringSave ("No action");
31250   } else if (a->xref_type->choice != XrefType_gene
31251     || (g = (GeneXrefTypePtr) a->xref_type->data.ptrvalue) == NULL) {
31252     str = StringSave ("Invalid action");
31253   } else {
31254     label = GetFeatureNameFromFeatureType (g->feature);
31255     if (g->suppression < sizeof (s_Suppression) / sizeof (CharPtr)) {
31256       suppression = s_Suppression[g->suppression];
31257     } else {
31258       suppression = NULL;
31259     }
31260     if (g->necessary < sizeof (s_Necessary) / sizeof (CharPtr)) {
31261       necessary = s_Necessary[g->necessary];
31262     } else {
31263       necessary = NULL;
31264     }
31265     constraint = SummarizeConstraintSet (a->constraint);
31266     len = StringLen (label) + StringLen (fmt) + StringLen (suppression) + StringLen (necessary) + StringLen (constraint);
31267     str = (CharPtr) MemNew (sizeof (Char) * len);
31268     sprintf (str, fmt,
31269              suppression == NULL ? "" : suppression, suppression == NULL ? "" : " ",
31270              necessary == NULL ? "" : necessary, necessary == NULL ? "" : " ",
31271              label);
31272     if (constraint != NULL) {
31273       StringCat (str, constraint);
31274       constraint = MemFree (constraint);
31275     }
31276   }
31277   return str;
31278 }
31279 
31280 
SummarizeMakeGeneXrefs(MakeGeneXrefActionPtr a)31281 static CharPtr SummarizeMakeGeneXrefs(MakeGeneXrefActionPtr a)
31282 {
31283   CharPtr constraint, str, label;
31284   CharPtr fmt = "Make gene xrefs from overlapping gene features for %s features%s";
31285 
31286   if (a == NULL) {
31287     str = StringSave ("No action");
31288   } else {
31289     label = GetFeatureNameFromFeatureType (a->feature);
31290     constraint = SummarizeConstraintSet (a->constraint);
31291     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (constraint) + StringLen (label)));
31292     sprintf (str, fmt, label, constraint == NULL ? "" : constraint);
31293     constraint = MemFree (constraint);
31294   }
31295   return str;
31296 }
31297 
31298 
SummarizeMacroAction(ValNodePtr vnp)31299 NLM_EXTERN CharPtr SummarizeMacroAction (ValNodePtr vnp)
31300 {
31301   CharPtr str = NULL;
31302 
31303   if (vnp == NULL) {
31304     return StringSave ("No action");
31305   }
31306   switch (vnp->choice) {
31307     case MacroActionChoice_aecr:
31308       str = SummarizeAECRAction (vnp->data.ptrvalue);
31309       break;
31310     case MacroActionChoice_parse:
31311       str = SummarizeParseAction (vnp->data.ptrvalue);
31312       break;
31313     case MacroActionChoice_add_feature:
31314       str = SummarizeApplyFeatureAction (vnp->data.ptrvalue);
31315       break;
31316     case MacroActionChoice_remove_feature:
31317       str = SummarizeRemoveFeatureAction (vnp->data.ptrvalue);
31318       break;
31319     case MacroActionChoice_edit_location:
31320       str = SummarizeEditFeatureLocationAction (vnp->data.ptrvalue);
31321       break;
31322     case MacroActionChoice_convert_feature:
31323       str = SummarizeConvertFeatureAction (vnp->data.ptrvalue);
31324       break;
31325     case MacroActionChoice_remove_descriptor:
31326       str = SummarizeRemoveDescriptorAction (vnp->data.ptrvalue);
31327       break;
31328     case MacroActionChoice_autodef:
31329       str = SummarizeAutodefAction (vnp->data.ptrvalue);
31330       break;
31331     case MacroActionChoice_removesets:
31332       str = StringSave ("Remove duplicate nested sets");
31333       break;
31334     case MacroActionChoice_trim_junk_from_primer_seq:
31335       str = StringSave ("Trim junk from primer seqs");
31336       break;
31337     case MacroActionChoice_fix_usa_and_states:
31338       str = StringSave ("Fix USA and state abbreviations in publications");
31339       break;
31340     case MacroActionChoice_trim_stop_from_complete_cds:
31341       str = StringSave ("Remove trailing * from complete coding regions");
31342       break;
31343     case MacroActionChoice_synchronize_cds_partials:
31344       str = StringSave ("Synchronize coding region partials");
31345       break;
31346     case MacroActionChoice_adjust_for_consensus_splice:
31347       str = StringSave ("Adjust coding regions for consensus splice sites");
31348       break;
31349     case MacroActionChoice_fix_pub_caps:
31350       str = SummarizeFixPubCapsAction(vnp->data.ptrvalue);
31351       break;
31352     case MacroActionChoice_remove_seg_gaps:
31353       str = StringSave ("Remove seg-gaps");
31354       break;
31355     case MacroActionChoice_sort_fields:
31356       str = SummarizeSortFieldsAction (vnp->data.ptrvalue);
31357       break;
31358     case MacroActionChoice_apply_molinfo_block:
31359       str = SummarizeMolinfoBlockAction (vnp->data.ptrvalue);
31360       break;
31361     case MacroActionChoice_fix_caps:
31362       str = SummarizeFixCapsAction (vnp->data.ptrvalue);
31363       break;
31364     case MacroActionChoice_fix_format:
31365       str = SummarizeFixFormatAction (vnp->data.ptrvalue);
31366       break;
31367     case MacroActionChoice_fix_spell:
31368       str = StringSave ("Fix spelling");
31369       break;
31370     case MacroActionChoice_remove_duplicate_features:
31371       str = SummarizeRemoveDuplicateFeaturesAction (vnp->data.ptrvalue);
31372       break;
31373     case MacroActionChoice_remove_lineage_notes:
31374       str = StringSave ("Remove lineage source notes");
31375       break;
31376     case MacroActionChoice_remove_xrefs:
31377       str = SummarizeRemoveXref(vnp->data.ptrvalue);
31378       break;
31379     case MacroActionChoice_make_gene_xrefs:
31380       str = SummarizeMakeGeneXrefs(vnp->data.ptrvalue);
31381       break;
31382     case MacroActionChoice_make_bold_xrefs:
31383       str = StringSave ("Make Barcode Xrefs");
31384       break;
31385     case MacroActionChoice_fix_author:
31386       str = SummarizeAuthorFixAction(vnp->data.ptrvalue);
31387       break;
31388     case MacroActionChoice_update_sequences:
31389       str = SummarizeUpdateSequencesAction (vnp->data.ptrvalue);
31390       break;
31391     case MacroActionChoice_add_trans_splicing:
31392       str = StringSave ("Set trans-splicing exception in genes");
31393       break;
31394     case MacroActionChoice_remove_invalid_ecnumbers:
31395       str = StringSave ("Remove invalid EC_numbers");
31396       break;
31397     case MacroActionChoice_create_tsa_ids:
31398       str = SummarizeCreateTSAIDsAction (vnp->data.ptrvalue);
31399       break;
31400     case MacroActionChoice_perform_autofix:
31401       str = SummarizePerformAutofixAction (vnp->data.ptrvalue);
31402       break;
31403     case MacroActionChoice_fix_sets:
31404       str = SummarizeFixSetsAction (vnp->data.ptrvalue);
31405       break;
31406     case MacroActionChoice_apply_table:
31407       str = SummarizeApplyTableAction (vnp->data.ptrvalue);
31408       break;
31409     case MacroActionChoice_remove_sequences:
31410       str = SummarizeRemoveSequencesAction (vnp->data.ptrvalue);
31411       break;
31412     case MacroActionChoice_propagate_sequence_technology:
31413       str = SummarizePropagateSequenceTechnology(vnp->data.ptrvalue);
31414       break;
31415     case MacroActionChoice_add_file_descriptors:
31416       str = SummarizeAddDescriptorListAction(vnp->data.ptrvalue);
31417       break;
31418     case MacroActionChoice_propagate_missing_old_name:
31419       str = StringSave ("Propagate missing old-name qualifier");
31420       break;
31421     case MacroActionChoice_autoapply_structured_comments:
31422       str = StringSave ("Autoapply structured comment prefixes");
31423       break;
31424     case MacroActionChoice_reorder_structured_comments:
31425       str = StringSave ("Reorder structured comment fields");
31426       break;
31427     case MacroActionChoice_remove_duplicate_structured_comments:
31428       str = StringSave ("Remove duplicate structured comments");
31429       break;
31430     case MacroActionChoice_lookup_taxonomy:
31431       str = StringSave ("Perform taxonomy lookup and correct genetic codes");
31432       break;
31433     case MacroActionChoice_lookup_pubs:
31434       str = StringSave ("Perform pubs lookup");
31435       break;
31436     case MacroActionChoice_trim_terminal_ns:
31437       str = StringSave ("Trim terminal Ns from nucleotide bioseqs");
31438       break;
31439     case MacroActionChoice_update_replaced_ecnumbers:
31440       str = StringSave ("Update Replaced EC_numbers");
31441       break;
31442     case MacroActionChoice_instantiate_protein_titles:
31443       str = StringSave ("Instantiate Protein Titles");
31444       break;
31445     case MacroActionChoice_retranslate_cds:
31446       str = StringSave ("Retranslate coding regions");
31447       break;
31448     case MacroActionChoice_add_selenocysteine_except:
31449       str = StringSave ("Replace selenocysteine stops");
31450       break;
31451     case MacroActionChoice_join_short_trnas:
31452       str = StringSave ("Join short tRNAs");
31453       break;
31454     case MacroActionChoice_adjust_features_for_gaps:
31455       str = StringSave ("Adjust features for gaps");
31456       break;
31457     default:
31458       str = StringSave ("Invalid action");
31459       break;
31460   }
31461   return str;
31462 }
31463 
31464 
31465 
31466 
ApplyMacroToSeqEntryExEx(SeqEntryPtr sep,ValNodePtr macro,FILE * log_fp,GlobalAlignFunc align_func,Int4Ptr pNumNoOp)31467 NLM_EXTERN Boolean ApplyMacroToSeqEntryExEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp, GlobalAlignFunc align_func, Int4Ptr pNumNoOp)
31468 {
31469   Int4 num_AECR = 0, num_parse = 0, num;
31470   Uint2 entityID;
31471   Boolean needs_update = FALSE;
31472   CharPtr summ;
31473   Boolean any_change = FALSE;
31474   Boolean created_protein_features = FALSE;
31475   ValNodePtr list;
31476   LogInfoData lid;
31477 
31478   entityID = SeqMgrGetEntityIDForSeqEntry(sep);
31479   if (pNumNoOp != NULL) {
31480     *pNumNoOp = 0;
31481   }
31482 
31483   while (macro != NULL) {
31484     needs_update = TRUE;
31485     switch (macro->choice) {
31486       case MacroActionChoice_aecr:
31487         num = ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep, &created_protein_features);
31488         num_AECR += num;
31489         if (num > 0) {
31490           if (log_fp != NULL) {
31491             summ = SummarizeAECRAction ((AECRActionPtr) macro->data.ptrvalue);
31492             fprintf (log_fp, "Changed %d fields during %s\n", num, summ);
31493             summ = MemFree (summ);
31494           }
31495           any_change = TRUE;
31496         } else if (pNumNoOp != NULL) {
31497           (*pNumNoOp)++;
31498         }
31499         if (created_protein_features) {
31500           if (log_fp != NULL) {
31501             fprintf (log_fp, "Created protein features\n");
31502           }
31503           any_change = TRUE;
31504         }
31505         break;
31506       case MacroActionChoice_parse:
31507         num = ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep);
31508         num_parse += num;
31509         if (num > 0) {
31510           if (log_fp != NULL) {
31511             summ = SummarizeParseAction ((ParseActionPtr) macro->data.ptrvalue);
31512             fprintf (log_fp, "Changed %d fields during %s\n", num, summ);
31513             summ = MemFree (summ);
31514           }
31515           any_change = TRUE;
31516         } else if (pNumNoOp != NULL) {
31517           (*pNumNoOp)++;
31518         }
31519         break;
31520       case MacroActionChoice_add_feature:
31521         num = ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep);
31522         if (num > 0) {
31523           if (log_fp != NULL) {
31524             fprintf (log_fp, "Added %d features\n", num);
31525           }
31526           any_change = TRUE;
31527         } else if (pNumNoOp != NULL) {
31528           (*pNumNoOp)++;
31529         }
31530         SeqMgrIndexFeatures (entityID, NULL);
31531         break;
31532       case MacroActionChoice_remove_feature:
31533         num = ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep);
31534         if (num > 0) {
31535           if (log_fp != NULL) {
31536             fprintf (log_fp, "Removed %d features\n", num);
31537           }
31538           any_change = TRUE;
31539           ObjMgrSetDirtyFlag (entityID, TRUE);
31540           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31541           needs_update = FALSE;
31542         } else if (pNumNoOp != NULL) {
31543           (*pNumNoOp)++;
31544         }
31545         break;
31546       case MacroActionChoice_edit_location:
31547         num = ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep, log_fp);
31548         if (num > 0) {
31549           any_change = TRUE;
31550           ObjMgrSetDirtyFlag (entityID, TRUE);
31551           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31552           needs_update = FALSE;
31553         } else if (pNumNoOp != NULL) {
31554           (*pNumNoOp)++;
31555         }
31556         break;
31557       case MacroActionChoice_convert_feature:
31558         num = ApplyConvertFeatureActionToSeqEntry ((ConvertFeatureActionPtr) macro->data.ptrvalue, sep, log_fp);
31559         if (num > 0) {
31560           any_change = TRUE;
31561         } else if (pNumNoOp != NULL) {
31562           (*pNumNoOp)++;
31563         }
31564         ObjMgrSetDirtyFlag (entityID, TRUE);
31565         ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31566         needs_update = FALSE;
31567         break;
31568       case MacroActionChoice_remove_descriptor:
31569         num = ApplyRemoveDescriptorActionToSeqEntry ((RemoveDescriptorActionPtr) macro->data.ptrvalue, sep);
31570         if (num > 0) {
31571           if (log_fp != NULL) {
31572             summ = SummarizeRemoveDescriptorAction ((RemoveDescriptorActionPtr) macro->data.ptrvalue);
31573             fprintf (log_fp, "Removed %d descriptors during %s\n", num, summ);
31574             summ = MemFree (summ);
31575           }
31576           any_change = TRUE;
31577         } else if (pNumNoOp != NULL) {
31578           (*pNumNoOp)++;
31579         }
31580         break;
31581       case MacroActionChoice_autodef:
31582         ApplyAutodefActionToSeqEntry ((AutodefActionPtr) macro->data.ptrvalue, sep);
31583         if (log_fp != NULL) {
31584           summ = SummarizeAutodefAction ((AutodefActionPtr) macro->data.ptrvalue);
31585           if (summ != NULL) {
31586             fprintf (log_fp, "Performed %s\n", summ);
31587           }
31588           summ = MemFree (summ);
31589         }
31590         any_change = TRUE;
31591         break;
31592       case MacroActionChoice_removesets:
31593         if (RemoveDuplicateNestedSetsForEntityID (entityID)) {
31594           if (log_fp != NULL) {
31595             fprintf (log_fp, "Removed duplicate nested sets\n");
31596           }
31597           any_change = TRUE;
31598         } else if (pNumNoOp != NULL) {
31599           (*pNumNoOp)++;
31600         }
31601         break;
31602       case MacroActionChoice_trim_junk_from_primer_seq:
31603         if (TrimPrimerSeqJunkInSeqEntry (sep, log_fp)) {
31604           any_change = TRUE;
31605         } else if (pNumNoOp != NULL) {
31606           (*pNumNoOp)++;
31607         }
31608         break;
31609       case MacroActionChoice_fix_usa_and_states:
31610         if (FixUsaAndStateAbbreviations (entityID, log_fp)) {
31611           any_change = TRUE;
31612         } else if (pNumNoOp != NULL) {
31613           (*pNumNoOp)++;
31614         }
31615         break;
31616       case MacroActionChoice_trim_stop_from_complete_cds:
31617         if (TrimStopsFromCompleteCodingRegions(sep, log_fp)) {
31618           ObjMgrSetDirtyFlag (entityID, TRUE);
31619           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31620           needs_update = FALSE;
31621           any_change = TRUE;
31622         } else if (pNumNoOp != NULL) {
31623           (*pNumNoOp)++;
31624         }
31625         break;
31626       case MacroActionChoice_synchronize_cds_partials:
31627         if (ResynchCodingRegionPartialsEx(sep, log_fp)) {
31628           ObjMgrSetDirtyFlag (entityID, TRUE);
31629           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31630           needs_update = FALSE;
31631           any_change = TRUE;
31632         } else if (pNumNoOp != NULL) {
31633           (*pNumNoOp)++;
31634         }
31635         break;
31636       case MacroActionChoice_adjust_for_consensus_splice:
31637         if (AdjustSeqEntryForConsensusSpliceEx(sep, log_fp, TRUE)) {
31638           ObjMgrSetDirtyFlag (entityID, TRUE);
31639           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31640           needs_update = FALSE;
31641           any_change = TRUE;
31642         } else if (pNumNoOp != NULL) {
31643           (*pNumNoOp)++;
31644         }
31645         break;
31646       case MacroActionChoice_fix_pub_caps:
31647         if (ApplyFixPubCapsToSeqEntry (macro->data.ptrvalue, sep, log_fp)) {
31648           any_change = TRUE;
31649         } else if (pNumNoOp != NULL) {
31650           (*pNumNoOp)++;
31651         }
31652         break;
31653       case MacroActionChoice_remove_seg_gaps:
31654         num = RemoveSegGapsInSeqEntry (sep);
31655         if (num > 0) {
31656           if (log_fp != NULL) {
31657             fprintf (log_fp, "Removed gaps in %d alignments\n", num);
31658           }
31659           any_change = TRUE;
31660         } else if (pNumNoOp != NULL) {
31661           (*pNumNoOp)++;
31662         }
31663         break;
31664       case MacroActionChoice_sort_fields:
31665         num = SortFieldsInSeqEntry (macro->data.ptrvalue, sep);
31666         if (num > 0) {
31667           if (log_fp != NULL) {
31668             summ = SummarizeSortFieldsAction (macro->data.ptrvalue);
31669             fprintf (log_fp, "Changed order of fields for %d objects during %s\n", num, summ);
31670             summ = MemFree (summ);
31671           }
31672           any_change = TRUE;
31673         } else if (pNumNoOp != NULL) {
31674           (*pNumNoOp)++;
31675         }
31676         break;
31677       case MacroActionChoice_apply_molinfo_block:
31678         if (ApplyMolinfoBlockToSeqEntryEx (sep, macro->data.ptrvalue, log_fp)) {
31679           any_change = TRUE;
31680         } else if (pNumNoOp != NULL) {
31681           (*pNumNoOp)++;
31682         }
31683         break;
31684       case MacroActionChoice_fix_caps:
31685         if (ApplyFixCapsToSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31686           any_change = TRUE;
31687         } else if (pNumNoOp != NULL) {
31688           (*pNumNoOp)++;
31689         }
31690         break;
31691       case MacroActionChoice_fix_format:
31692         if (ApplyFixFormatToSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31693           any_change = TRUE;
31694         } else if (pNumNoOp != NULL) {
31695           (*pNumNoOp)++;
31696         }
31697         break;
31698       case MacroActionChoice_fix_spell:
31699         if (SpellFixSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31700           any_change = TRUE;
31701         } else if (pNumNoOp != NULL) {
31702           (*pNumNoOp)++;
31703         }
31704         break;
31705       case MacroActionChoice_remove_duplicate_features:
31706         if (RemoveDuplicateFeaturesInSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31707           any_change = TRUE;
31708           ObjMgrSetDirtyFlag (entityID, TRUE);
31709           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31710           needs_update = FALSE;
31711         } else if (pNumNoOp != NULL) {
31712           (*pNumNoOp)++;
31713         }
31714         break;
31715       case MacroActionChoice_remove_lineage_notes:
31716         if (RemoveLineageNotesInSeqEntry (sep, log_fp)) {
31717           any_change = TRUE;
31718         } else if (pNumNoOp != NULL) {
31719           (*pNumNoOp)++;
31720         }
31721         break;
31722       case MacroActionChoice_remove_xrefs:
31723         if (MacroRemoveXrefs (sep, macro->data.ptrvalue, log_fp)) {
31724           any_change = TRUE;
31725         } else if (pNumNoOp != NULL) {
31726           (*pNumNoOp)++;
31727         }
31728         break;
31729       case MacroActionChoice_make_gene_xrefs:
31730         if (MacroMakeGeneXrefs (sep, macro->data.ptrvalue, log_fp)) {
31731           any_change = TRUE;
31732         } else if (pNumNoOp != NULL) {
31733           (*pNumNoOp)++;
31734         }
31735         break;
31736       case MacroActionChoice_make_bold_xrefs:
31737         if (MacroMakeBoldXrefs (sep, log_fp)) {
31738           any_change = TRUE;
31739         } else if (pNumNoOp != NULL) {
31740           (*pNumNoOp)++;
31741         }
31742         break;
31743       case MacroActionChoice_fix_author:
31744         if (ApplyAuthorFixToSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31745           any_change = TRUE;
31746         } else if (pNumNoOp != NULL) {
31747           (*pNumNoOp)++;
31748         }
31749         break;
31750       case MacroActionChoice_update_sequences:
31751         if (UpdateSequencesInSeqEntry (sep, macro->data.ptrvalue, log_fp, align_func)) {
31752           any_change = TRUE;
31753         } else if (pNumNoOp != NULL) {
31754           (*pNumNoOp)++;
31755         }
31756         break;
31757       case MacroActionChoice_add_trans_splicing:
31758         if (AddTransSplicingInSeqEntry (sep, log_fp)) {
31759           any_change = TRUE;
31760         } else if (pNumNoOp != NULL) {
31761           (*pNumNoOp)++;
31762         }
31763         break;
31764       case MacroActionChoice_remove_invalid_ecnumbers:
31765         if (RemoveInvalidECnumbersInSeqEntry (sep, log_fp)) {
31766           any_change = TRUE;
31767         } else if (pNumNoOp != NULL) {
31768           (*pNumNoOp)++;
31769         }
31770         break;
31771       case MacroActionChoice_create_tsa_ids:
31772         if (CreateTsaIDsInSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31773           any_change = TRUE;
31774         } else if (pNumNoOp != NULL) {
31775           (*pNumNoOp)++;
31776         }
31777         break;
31778       case MacroActionChoice_perform_autofix:
31779         if (PerformAutofixInSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31780           any_change = TRUE;
31781         } else if (pNumNoOp != NULL) {
31782           (*pNumNoOp)++;
31783         }
31784         break;
31785       case MacroActionChoice_fix_sets:
31786         if (PerformFixSetsInSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31787           any_change = TRUE;
31788         } else if (pNumNoOp != NULL) {
31789           (*pNumNoOp)++;
31790         }
31791         break;
31792       case MacroActionChoice_apply_table:
31793         if (PerformApplyTableInSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31794           any_change = TRUE;
31795         } else if (pNumNoOp != NULL) {
31796           (*pNumNoOp)++;
31797         }
31798         break;
31799       case MacroActionChoice_remove_sequences:
31800         if (PerformRemoveSequencesInSeqEntry (sep, macro->data.ptrvalue, log_fp)) {
31801           ObjMgrSetDirtyFlag (entityID, TRUE);
31802           ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31803           needs_update = FALSE;
31804           any_change = TRUE;
31805         } else if (pNumNoOp != NULL) {
31806           (*pNumNoOp)++;
31807         }
31808         break;
31809       case MacroActionChoice_propagate_sequence_technology:
31810         if (PerformPropagateSequenceTechnology(sep, macro->data.ptrvalue, log_fp)) {
31811           any_change = TRUE;
31812         } else if (pNumNoOp != NULL) {
31813           (*pNumNoOp)++;
31814         }
31815         break;
31816       case MacroActionChoice_add_file_descriptors:
31817         if (AddFileDescriptors (sep, macro->data.ptrvalue, log_fp)) {
31818           any_change = TRUE;
31819         } else if (pNumNoOp != NULL) {
31820           (*pNumNoOp)++;
31821         }
31822         break;
31823       case MacroActionChoice_propagate_missing_old_name:
31824         list = ValNodeNew (NULL);
31825         list->data.ptrvalue = sep;
31826         if (PropagateMissingOldNames (list)) {
31827           any_change = TRUE;
31828           if (log_fp != NULL) {
31829             fprintf (log_fp, "Propagated missing old name qualifiers\n");
31830           }
31831         } else if (pNumNoOp != NULL) {
31832           (*pNumNoOp)++;
31833         }
31834         break;
31835       case MacroActionChoice_autoapply_structured_comments:
31836         if (AutoApplyStructuredCommentPrefixes (sep, log_fp)) {
31837           any_change = TRUE;
31838         } else if (pNumNoOp != NULL) {
31839           (*pNumNoOp)++;
31840         }
31841         break;
31842       case MacroActionChoice_reorder_structured_comments:
31843         if (ReorderStructuredCommentsInSeqEntry (sep)) {
31844           if (log_fp != NULL) {
31845             fprintf (log_fp, "Reordered structured comment fields\n");
31846           }
31847           any_change = TRUE;
31848         } else if (pNumNoOp != NULL) {
31849           (*pNumNoOp)++;
31850         }
31851         break;
31852       case MacroActionChoice_remove_duplicate_structured_comments:
31853         if (RemoveDuplicateStructuredCommentsInSeqEntry(sep)) {
31854           if (log_fp != NULL) {
31855             fprintf (log_fp, "Removed duplicate structured comments\n");
31856           }
31857           any_change = TRUE;
31858         } else if (pNumNoOp != NULL) {
31859           (*pNumNoOp)++;
31860         }
31861         break;
31862       case MacroActionChoice_lookup_taxonomy:
31863         Taxon3ReplaceOrgInSeqEntry(sep, FALSE);
31864         CorrectGenCodes (sep, entityID);
31865         if (log_fp != NULL) {
31866           fprintf (log_fp, "Performed TaxLookup and corrected genetic codes\n");
31867         }
31868         any_change = TRUE;
31869         break;
31870       case MacroActionChoice_lookup_pubs:
31871         MemSet (&lid, 0, sizeof (LogInfoData));
31872         lid.fp = log_fp;
31873         num = LookupPubsInSeqEntry (sep, log_fp == NULL ? NULL : &lid);
31874         if (num > 0) {
31875           any_change = TRUE;
31876           if (log_fp != NULL) {
31877             fprintf (log_fp, "Replaced %d pubs during Pub Lookup\n", num);
31878           }
31879         } else if (pNumNoOp != NULL) {
31880           (*pNumNoOp)++;
31881         }
31882         break;
31883       case MacroActionChoice_trim_terminal_ns:
31884         MemSet (&lid, 0, sizeof (LogInfoData));
31885         lid.fp = log_fp;
31886         num = TrimNsFromNucsInSeqEntry (sep, log_fp == NULL ? NULL : &lid);
31887         if (num > 0) {
31888           any_change = TRUE;
31889           if (log_fp != NULL) {
31890             fprintf (log_fp, "Trimmed terminal Ns from %d sequences\n", num);
31891           }
31892         } else if (pNumNoOp != NULL) {
31893           (*pNumNoOp)++;
31894         }
31895         break;
31896       case MacroActionChoice_update_replaced_ecnumbers:
31897         if (ReplaceUpdatedECNumbers(sep, macro->data.ptrvalue, log_fp)) {
31898           any_change = TRUE;
31899         } else if (pNumNoOp != NULL) {
31900           (*pNumNoOp)++;
31901         }
31902         break;
31903       case MacroActionChoice_instantiate_protein_titles:
31904         InstantiateProteinTitles (entityID, NULL);
31905         any_change = TRUE;
31906         if (log_fp != NULL) {
31907           fprintf (log_fp, "Instantiated protein titles\n", num);
31908         }
31909         break;
31910       case MacroActionChoice_retranslate_cds:
31911         if (PerformRetranslations (sep, macro->data.ptrvalue, log_fp)) {
31912           any_change = TRUE;
31913         } else if (pNumNoOp != NULL) {
31914           (*pNumNoOp)++;
31915         }
31916         break;
31917       case MacroActionChoice_add_selenocysteine_except:
31918         if (ReplaceStopsWithSelenocysteineInSeqEntry(sep, log_fp)) {
31919           any_change = TRUE;
31920         } else if (pNumNoOp != NULL) {
31921           (*pNumNoOp)++;
31922         }
31923         break;
31924       case MacroActionChoice_join_short_trnas:
31925         if (JoinShortTrnas(sep, log_fp)) {
31926           any_change = TRUE;
31927         } else if (pNumNoOp != NULL) {
31928           (*pNumNoOp)++;
31929         }
31930         break;
31931       case MacroActionChoice_adjust_features_for_gaps:
31932         if (PerformAdjustFeaturesForGaps (sep, macro->data.ptrvalue, log_fp)) {
31933           any_change = TRUE;
31934         } else if (pNumNoOp != NULL) {
31935           (*pNumNoOp)++;
31936         }
31937         break;
31938     }
31939     macro = macro->next;
31940   }
31941 
31942   if (needs_update) {
31943     ObjMgrSetDirtyFlag (entityID, TRUE);
31944     ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
31945   }
31946   return any_change;
31947 }
31948 
31949 
ApplyMacroToSeqEntryEx(SeqEntryPtr sep,ValNodePtr macro,FILE * log_fp,GlobalAlignFunc align_func)31950 NLM_EXTERN Boolean ApplyMacroToSeqEntryEx (SeqEntryPtr sep, ValNodePtr macro, FILE *log_fp, GlobalAlignFunc align_func)
31951 {
31952   return ApplyMacroToSeqEntryExEx (sep, macro, log_fp, align_func, NULL);
31953 }
31954 
31955 
ApplyMacroToSeqEntry(SeqEntryPtr sep,ValNodePtr macro)31956 NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro)
31957 {
31958   ApplyMacroToSeqEntryEx (sep, macro, NULL, NULL);
31959 }
31960 
31961 
PreprocessApplyTableMacro(ApplyTableActionPtr apply_table,FILE * log_fp)31962 static Boolean PreprocessApplyTableMacro (ApplyTableActionPtr apply_table, FILE *log_fp)
31963 {
31964   Boolean rval = TRUE;
31965   FILE *fp;
31966 
31967   if (apply_table->in_memory_table == NULL) {
31968     apply_table->in_memory_table = ValNodeNew (NULL);
31969     apply_table->in_memory_table->choice = ApplyTableExtraData_table;
31970     if (StringHasNoText (apply_table->filename)) {
31971       rval = FALSE;
31972       if (log_fp != NULL) {
31973         fprintf (log_fp, "No filename supplied for apply table action\n");
31974       }
31975     } else {
31976       fp = FileOpen (apply_table->filename, "r");
31977       if (fp == NULL) {
31978         if (log_fp != NULL) {
31979           fprintf (log_fp, "Unable to open %s\n", apply_table->filename);
31980         }
31981         rval = FALSE;
31982       } else {
31983         apply_table->in_memory_table->data.ptrvalue = ReadTabTableFromFile (fp);
31984         FileClose (fp);
31985         if (apply_table->in_memory_table->data.ptrvalue == NULL) {
31986           if (log_fp != NULL) {
31987             fprintf (log_fp, "Unable to read table from %s\n", apply_table->filename);
31988           }
31989           rval = FALSE;
31990         }
31991       }
31992     }
31993   }
31994   return rval;
31995 }
31996 
31997 
PreprocessAddDescriptorListMacro(AddDescriptorListActionPtr action,FILE * log_fp)31998 static Boolean PreprocessAddDescriptorListMacro (AddDescriptorListActionPtr action, FILE *log_fp)
31999 {
32000   Boolean  rval = TRUE;
32001   AsnIoPtr aip;
32002   SeqDescPtr sdp;
32003   SeqDescrPtr sdp_list = NULL;
32004 
32005   if (action->descriptor_list->in_memory_table == NULL) {
32006     action->descriptor_list->in_memory_table = ValNodeNew (NULL);
32007     action->descriptor_list->in_memory_table->choice = ApplyTableExtraData_table;
32008     if (StringHasNoText (action->descriptor_list->filename)) {
32009       return FALSE;
32010     }
32011     aip = AsnIoOpen (action->descriptor_list->filename, "r");
32012     if (aip == NULL)
32013     {
32014       if (log_fp != NULL) {
32015         fprintf (log_fp, "Unable to open %s\n", action->descriptor_list->filename);
32016       }
32017       return FALSE;
32018     }
32019 
32020     while (sdp = SeqDescAsnRead (aip, NULL)) {
32021       ValNodeLink (&sdp_list, sdp);
32022     }
32023     AsnIoClose (aip);
32024 
32025     if (sdp_list == NULL) {
32026       if (log_fp != NULL) {
32027         fprintf (log_fp, "Unable to read table from %s\n", action->descriptor_list->filename);
32028       }
32029       return FALSE;
32030     }
32031     action->descriptor_list->in_memory_table->data.ptrvalue = sdp_list;
32032   }
32033   return rval;
32034 }
32035 
32036 
PreprocessMacroForRepeatedUse(ValNodePtr macro,FILE * log_fp)32037 NLM_EXTERN Boolean PreprocessMacroForRepeatedUse (ValNodePtr macro, FILE *log_fp)
32038 {
32039   ValNodePtr vnp;
32040   Boolean rval = TRUE;
32041 
32042   for (vnp = macro; vnp != NULL; vnp = vnp->next) {
32043     if (vnp->choice == MacroActionChoice_apply_table) {
32044       rval &= PreprocessApplyTableMacro(vnp->data.ptrvalue, log_fp);
32045     } else if (vnp->choice == MacroActionChoice_add_file_descriptors) {
32046       rval &= PreprocessAddDescriptorListMacro (vnp->data.ptrvalue, log_fp);
32047     }
32048   }
32049   return rval;
32050 }
32051 
32052 
CleanupMacroAfterRepeatedUse(ValNodePtr macro)32053 NLM_EXTERN void CleanupMacroAfterRepeatedUse (ValNodePtr macro)
32054 {
32055   ValNodePtr vnp;
32056   ApplyTableActionPtr apply_table;
32057   AddDescriptorListActionPtr desc_list;
32058   SeqDescPtr sdp, sdp_next;
32059 
32060   for (vnp = macro; vnp != NULL; vnp = vnp->next) {
32061     if (vnp->choice == MacroActionChoice_apply_table) {
32062       if ((apply_table = (ApplyTableActionPtr) vnp->data.ptrvalue) != NULL
32063           && apply_table->in_memory_table != NULL) {
32064         apply_table->in_memory_table->data.ptrvalue = FreeTabTable (apply_table->in_memory_table->data.ptrvalue);
32065         apply_table->in_memory_table = ValNodeFree (apply_table->in_memory_table);
32066       }
32067     } else if (vnp->choice == MacroActionChoice_add_file_descriptors) {
32068       if ((desc_list = (AddDescriptorListActionPtr) vnp->data.ptrvalue) != NULL
32069           && desc_list->descriptor_list != NULL
32070           && desc_list->descriptor_list->in_memory_table != NULL) {
32071         for (sdp = desc_list->descriptor_list->in_memory_table->data.ptrvalue;
32072              sdp != NULL;
32073              sdp = sdp_next) {
32074           sdp_next = sdp->next;
32075           sdp->next = NULL;
32076           sdp = SeqDescFree (sdp);
32077         }
32078         desc_list->descriptor_list->in_memory_table->data.ptrvalue = NULL;
32079       }
32080     }
32081   }
32082 }
32083 
32084 
32085 /* for generating text descriptions of macro objects */
SummarizeSourceQual(ValNodePtr field)32086 NLM_EXTERN CharPtr SummarizeSourceQual (ValNodePtr field)
32087 {
32088   CharPtr summ = NULL, locname, origname;
32089   Int4    genome, origin;
32090   CharPtr loc_fmt = "location %s";
32091   CharPtr orig_fmt = "origin %s";
32092 
32093   if (field == NULL) return NULL;
32094   switch (field->choice) {
32095     case SourceQualChoice_textqual:
32096       summ = StringSave (GetSourceQualName (field->data.intvalue));
32097       break;
32098     case SourceQualChoice_location:
32099       genome = GenomeFromSrcLoc (field->data.intvalue);
32100       locname = LocNameFromGenome (genome);
32101       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (loc_fmt) + StringLen (locname)));
32102       sprintf (summ, loc_fmt, locname);
32103       break;
32104     case SourceQualChoice_origin:
32105       origin = OriginFromSrcOrig (field->data.intvalue);
32106       origname = OriginNameFromOrigin (origin);
32107       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (orig_fmt) + StringLen (origname)));
32108       sprintf (summ, orig_fmt, origname);
32109       break;
32110   }
32111   return summ;
32112 }
32113 
32114 
FeatureFieldLabel(CharPtr feature_name,ValNodePtr field)32115 NLM_EXTERN CharPtr FeatureFieldLabel (CharPtr feature_name, ValNodePtr field)
32116 {
32117   CharPtr cp;
32118   CharPtr label = NULL;
32119   CharPtr legal_fmt = "%s %s";
32120   CharPtr illegal_fmt = "constrained field on %s";
32121 
32122   if (feature_name == NULL) {
32123     feature_name = "Unknown feature";
32124   }
32125 
32126   if (field == NULL) {
32127     return StringSave ("missing field");
32128   } else if (field->choice == FeatQualChoice_legal_qual) {
32129     cp = GetFeatQualName (field->data.intvalue);
32130     if (cp == NULL) cp = "Unknown field type";
32131     label = (CharPtr) MemNew (sizeof (Char) * (StringLen (legal_fmt) + StringLen (feature_name) + StringLen (cp)));
32132     sprintf (label, legal_fmt, feature_name, cp);
32133   } else if (field->choice == FeatQualChoice_illegal_qual) {
32134     label = (CharPtr) MemNew (sizeof (Char) * (StringLen (illegal_fmt) + StringLen (feature_name)));
32135     sprintf (label, illegal_fmt, feature_name);
32136   } else {
32137     label = StringSave ("illegal field value");
32138   }
32139   return label;
32140 }
32141 
32142 
SummarizeFieldType(ValNodePtr vnp)32143 NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp)
32144 {
32145   FeatureFieldPtr ffp;
32146   CharPtr str = NULL;
32147   CharPtr    label = NULL;
32148   CharPtr pub_fmt = "publication %s";
32149 
32150   if (vnp == NULL) {
32151     str = StringSave ("missing field");
32152   } else {
32153     switch (vnp->choice) {
32154       case FieldType_source_qual:
32155         str = SummarizeSourceQual (vnp->data.ptrvalue);
32156         break;
32157       case FieldType_feature_field:
32158         ffp = (FeatureFieldPtr) vnp->data.ptrvalue;
32159         if (ffp == NULL || ffp->field == NULL) {
32160           str = StringSave ("missing field");
32161         } else {
32162           label = GetFeatureNameFromFeatureType (ffp->type);
32163           str = FeatureFieldLabel (label, ffp->field);
32164         }
32165         break;
32166       case FieldType_cds_gene_prot:
32167         str = StringSaveNoNull (CDSGeneProtNameFromField (vnp->data.intvalue));
32168         if (str == NULL) {
32169           str = StringSave ("Invalid CDS-Gene-Prot Field");
32170         }
32171         break;
32172       case FieldType_molinfo_field:
32173         str = GetSequenceQualName (vnp->data.ptrvalue);
32174         if (str == NULL) {
32175           str = StringSave ("Invalid Sequence Qual Field");
32176         }
32177         break;
32178       case FieldType_pub:
32179         switch (vnp->data.intvalue) {
32180           case Publication_field_cit:
32181             str = StringSave ("publication citation");
32182             break;
32183           case Publication_field_authors:
32184             str = StringSave ("publication authors");
32185             break;
32186           case Publication_field_journal:
32187             str = StringSave ("publication journal");
32188             break;
32189           case Publication_field_volume:
32190             str = StringSave ("publication volume");
32191             break;
32192           case Publication_field_issue:
32193             str = StringSave ("publication issue");
32194             break;
32195           case Publication_field_pages:
32196             str = StringSave ("publication pages");
32197             break;
32198           case Publication_field_date:
32199             str = StringSave ("publication date");
32200             break;
32201           case Publication_field_serial_number:
32202             str = StringSave ("publication serial number");
32203             break;
32204           case Publication_field_title:
32205             str = StringSave ("publication title");
32206             break;
32207           case Publication_field_pmid:
32208             str = StringSave ("PMID");
32209             break;
32210           default:
32211             label = GetPubFieldLabel (vnp->data.intvalue);
32212             if (label == NULL) {
32213               str = StringSave ("Invalid field type");
32214             } else {
32215               str = MemNew (sizeof (Char) * (StringLen (pub_fmt) + StringLen (label)));
32216               sprintf (str, pub_fmt, label);
32217             }
32218             break;
32219         }
32220         break;
32221       case FieldType_rna_field:
32222         str = SummarizeRnaQual (vnp->data.ptrvalue);
32223         break;
32224       case FieldType_struc_comment_field:
32225         str = SummarizeStructuredCommentField (vnp->data.ptrvalue);
32226         break;
32227       case FieldType_dblink:
32228         str = StringSave (GetDBLinkNameFromDBLinkFieldType (vnp->data.intvalue));
32229         break;
32230       case FieldType_misc:
32231         if (vnp->data.intvalue == Misc_field_genome_project_id) {
32232           str = StringSave ("Genome Project ID");
32233         } else if (vnp->data.intvalue == Misc_field_comment_descriptor) {
32234           str = StringSave ("Comment Descriptor");
32235         } else if (vnp->data.intvalue == Misc_field_defline) {
32236           str = StringSave ("Definition Line");
32237         } else if (vnp->data.intvalue == Misc_field_keyword) {
32238           str = StringSave ("Keyword");
32239         } else {
32240           str = StringSave ("Invalid field type");
32241         }
32242         break;
32243       default:
32244         str = StringSave ("Invalid field type");
32245         break;
32246     }
32247   }
32248   return str;
32249 }
32250 
32251 
FieldTypeFromString(CharPtr str)32252 NLM_EXTERN FieldTypePtr FieldTypeFromString (CharPtr str)
32253 {
32254   Int4 qual_type, feat_type = -1;
32255   FieldTypePtr ft = NULL;
32256   FeatureFieldPtr ffp;
32257   ValNodePtr   vnp, molfield;
32258   CharPtr      cpy, cp;
32259   RnaQualPtr   rq;
32260 
32261   if (StringHasNoText (str)) {
32262     return NULL;
32263   }
32264 
32265   /* check source quals first */
32266   qual_type = GetSourceQualTypeByName (str);
32267   if (qual_type > -1) {
32268     vnp = ValNodeNew (NULL);
32269     vnp->choice = SourceQualChoice_textqual;
32270     vnp->data.intvalue = qual_type;
32271     ft = ValNodeNew (NULL);
32272     ft->choice = FieldType_source_qual;
32273     ft->data.ptrvalue = vnp;
32274   } else {
32275     /* try feature fields */
32276     cpy = StringSave (str);
32277     cp = StringChr (cpy, ' ');
32278     while (cp != NULL && feat_type == -1) {
32279       *cp = 0;
32280       feat_type = GetFeatureTypeByName (cpy);
32281       if (feat_type < 0) {
32282         *cp = ' ';
32283         cp = StringChr (cp + 1, ' ');
32284       }
32285     }
32286     if (feat_type > -1) {
32287       qual_type = GetFeatQualByName (cp + 1);
32288       if (qual_type > -1) {
32289         ffp = FeatureFieldNew ();
32290         ffp->type = feat_type;
32291         ValNodeAddInt (&ffp->field, FeatQualChoice_legal_qual, qual_type);
32292         ft = ValNodeNew (NULL);
32293         ft->choice = FieldType_feature_field;
32294         ft->data.ptrvalue = ffp;
32295       }
32296     }
32297     cpy = MemFree (cpy);
32298     if (ft == NULL) {
32299       /* try CDS-gene-prot */
32300       qual_type = CDSGeneProtFieldFromName (str);
32301       if (qual_type > -1) {
32302         ft = ValNodeNew (NULL);
32303         ft->choice = FieldType_cds_gene_prot;
32304         ft->data.intvalue = qual_type;
32305       }
32306     }
32307     if (ft == NULL) {
32308       /* try RNA Quals */
32309       cpy = StringSave (str);
32310       cp = StringChr (cpy, ' ');
32311       if (cp != NULL) {
32312         *cp = 0;
32313         feat_type = GetRnaTypeForName (cpy);
32314         qual_type = GetRnaFieldForName (cp + 1);
32315         if (feat_type > -1 && qual_type > -1) {
32316           rq = RnaQualNew ();
32317           rq->type = ValNodeNew (NULL);
32318           rq->type->choice = feat_type;
32319           rq->type->data.ptrvalue = NULL;
32320           rq->field = qual_type;
32321           ft = ValNodeNew (NULL);
32322           ft->choice = FieldType_rna_field;
32323           ft->data.ptrvalue = rq;
32324         }
32325       }
32326       cpy = MemFree (cpy);
32327     }
32328     if (ft == NULL && Matchnamestring (str, "comment-descriptor")) {
32329       ft = ValNodeNew (NULL);
32330       ft->choice = FieldType_misc;
32331       ft->data.intvalue = Misc_field_comment_descriptor;
32332     }
32333 
32334     /* try DBLink fields */
32335     if (ft == NULL)  {
32336       qual_type = GetDBLinkFieldTypeFromDBLinkName (str);
32337       if (qual_type > -1) {
32338         ft = ValNodeNew (NULL);
32339         ft->choice = FieldType_dblink;
32340         ft->data.intvalue = qual_type;
32341       }
32342     }
32343 
32344     /* try publication fields */
32345     if (ft == NULL) {
32346       qual_type = GetPubFieldFromLabel(str);
32347       if (qual_type > -1) {
32348         ft = ValNodeNew (NULL);
32349         ft->choice = FieldType_pub;
32350         ft->data.intvalue = qual_type;
32351       }
32352     }
32353     /* molinfo fields */
32354     if (ft == NULL) {
32355       if (StringsAreEquivalent(str, "completeness")) {
32356         molfield = ValNodeNew (NULL);
32357         molfield->choice = MolinfoField_completedness;
32358         ft = ValNodeNew (NULL);
32359         ft->choice = FieldType_molinfo_field;
32360         ft->data.ptrvalue = molfield;
32361       } else if (StringsAreEquivalent(str, "topology")) {
32362         molfield = ValNodeNew (NULL);
32363         molfield->choice = MolinfoField_topology;
32364         ft = ValNodeNew (NULL);
32365         ft->choice = FieldType_molinfo_field;
32366         ft->data.ptrvalue = molfield;
32367       }
32368     }
32369     /* location/genome */
32370     if (ft == NULL && StringsAreEquivalent(str, "location") || StringsAreEquivalent(str, "genome")) {
32371       vnp = ValNodeNew (NULL);
32372       vnp->choice = SourceQualValChoice_location;
32373       ft = ValNodeNew (NULL);
32374       ft->choice = FieldType_source_qual;
32375       ft->data.ptrvalue = vnp;
32376     }
32377 
32378   }
32379   return ft;
32380 }
32381 
32382 
IsFieldTypeNonText(ValNodePtr field_type)32383 NLM_EXTERN Boolean IsFieldTypeNonText (ValNodePtr field_type)
32384 {
32385   ValNodePtr      vnp;
32386   Boolean         rval = FALSE;
32387 
32388   if (field_type == NULL) {
32389     return FALSE;
32390   }
32391   switch (field_type->choice) {
32392     case FieldType_source_qual :
32393       vnp = (ValNodePtr) field_type->data.ptrvalue;
32394       if (vnp != NULL) {
32395         if (vnp->choice == SourceQualChoice_location || vnp->choice == SourceQualChoice_origin) {
32396           rval = TRUE;
32397         } else if (vnp->choice == SourceQualChoice_textqual) {
32398           if (IsNonTextSourceQual (vnp->data.intvalue)) {
32399             rval = TRUE;
32400           }
32401         }
32402       }
32403       break;
32404     case FieldType_molinfo_field :
32405       rval = TRUE;
32406       break;
32407   }
32408   return rval;
32409 }
32410 
32411 
SummarizeExistingText(Uint2 existing_text)32412 NLM_EXTERN CharPtr SummarizeExistingText (Uint2 existing_text)
32413 {
32414   CharPtr str = NULL;
32415 
32416   switch (existing_text) {
32417     case ExistingTextOption_append_semi :
32418       str = "append separated by semicolon";
32419       break;
32420     case ExistingTextOption_append_space :
32421       str = "append separated by space";
32422       break;
32423     case ExistingTextOption_append_colon :
32424       str = "append separated by colon";
32425       break;
32426     case ExistingTextOption_append_comma:
32427       str = "append separated by comma";
32428       break;
32429     case ExistingTextOption_append_none :
32430       str = "append (no separator)";
32431       break;
32432     case ExistingTextOption_prefix_semi :
32433       str = "prefix separated by semicolon";
32434       break;
32435     case ExistingTextOption_prefix_space :
32436       str = "prefix separated by space";
32437       break;
32438     case ExistingTextOption_prefix_colon :
32439       str = "prefix separated by colon";
32440       break;
32441     case ExistingTextOption_prefix_comma:
32442       str = "prefix separated by comma";
32443       break;
32444     case ExistingTextOption_prefix_none :
32445       str = "prefix (no separator)";
32446       break;
32447     case ExistingTextOption_leave_old :
32448       str = "ignore new text when existing text is present";
32449       break;
32450     case ExistingTextOption_replace_old :
32451       str = "overwrite existing text";
32452       break;
32453     case ExistingTextOption_add_qual :
32454       str = "add new qual";
32455       break;
32456     default:
32457       str = "invalid existing_text option";
32458       break;
32459   }
32460   return str;
32461 }
32462 
32463 
SummarizeTextMarker(TextMarkerPtr text_marker)32464 static CharPtr SummarizeTextMarker (TextMarkerPtr text_marker)
32465 {
32466   CharPtr summ = NULL;
32467 
32468   if (IsTextMarkerEmpty (text_marker)) {
32469     return NULL;
32470   } else if (text_marker->choice == TextMarker_free_text) {
32471     summ = StringSave (text_marker->data.ptrvalue);
32472   } else if (text_marker->choice == TextMarker_digits) {
32473     summ = StringSave ("numbers");
32474   } else if (text_marker->choice == TextMarker_letters) {
32475     summ = StringSave ("letters");
32476   }
32477   return summ;
32478 }
32479 
32480 
SummarizeTextPortion(TextPortionPtr text_portion)32481 NLM_EXTERN CharPtr SummarizeTextPortion (TextPortionPtr text_portion)
32482 {
32483   CharPtr summ = NULL;
32484   CharPtr left_fmt = NULL, right_fmt = NULL;
32485   CharPtr left_text = NULL, right_text = NULL;
32486   Int4 len = 6;
32487 
32488   if (text_portion == NULL
32489       || (IsTextMarkerEmpty (text_portion->left_marker)
32490           && IsTextMarkerEmpty (text_portion->right_marker))) {
32491     summ = StringSave ("entire text");
32492   } else {
32493     left_text = SummarizeTextMarker(text_portion->left_marker);
32494     right_text = SummarizeTextMarker(text_portion->right_marker);
32495 
32496     if (text_portion->inside) {
32497       if (left_text != NULL) {
32498         if (text_portion->include_left) {
32499           left_fmt = "starting with ";
32500         } else {
32501           left_fmt = "just after ";
32502         }
32503         len += StringLen (left_fmt) + StringLen (left_text) + 3;
32504       }
32505       if (right_text != NULL) {
32506         if (text_portion->include_right) {
32507           right_fmt = "up to and including ";
32508         } else {
32509           right_fmt = "up to ";
32510         }
32511         len += StringLen (right_fmt) + StringLen (right_text) + 3;
32512         if (left_fmt != NULL) {
32513           len += 2;
32514         }
32515       }
32516       if (left_fmt == NULL && right_fmt == NULL) {
32517         summ = StringSave ("entire text");
32518       } else {
32519         summ = (CharPtr) MemNew (sizeof (Char) * len);
32520         StringCat (summ, "text ");
32521         if (left_fmt != NULL) {
32522           StringCat (summ, left_fmt);
32523           StringCat (summ, "'");
32524           StringCat (summ, left_text);
32525           StringCat (summ, "'");
32526           if (right_fmt != NULL) {
32527             StringCat (summ, ", ");
32528           }
32529         }
32530         if (right_fmt != NULL) {
32531           StringCat (summ, right_fmt);
32532           StringCat (summ, "'");
32533           StringCat (summ, right_text);
32534           StringCat (summ, "'");
32535         }
32536       }
32537     } else {
32538       if (right_text != NULL) {
32539         if (text_portion->include_right) {
32540           right_fmt = "starting with ";
32541         } else {
32542           right_fmt = "after ";
32543         }
32544         len += StringLen (right_fmt) + StringLen (right_text) + 3;
32545       }
32546       if (left_text != NULL) {
32547         if (text_portion->include_left) {
32548           left_fmt = "up to and including ";
32549         } else {
32550           left_fmt = "before ";
32551         }
32552         len += StringLen (left_fmt) + StringLen (left_text) + 3;
32553         if (right_fmt != NULL) {
32554           len += 5;
32555         }
32556       }
32557 
32558       if (left_fmt == NULL && right_fmt == NULL) {
32559         summ = StringSave ("entire text");
32560       } else {
32561         summ = (CharPtr) MemNew (sizeof (Char) * len);
32562         StringCat (summ, "text ");
32563         if (right_fmt != NULL) {
32564           StringCat (summ, right_fmt);
32565           StringCat (summ, "'");
32566           StringCat (summ, right_text);
32567           StringCat (summ, "'");
32568           if (left_fmt != NULL) {
32569             StringCat (summ, " and ");
32570           }
32571         }
32572         if (left_fmt != NULL) {
32573           StringCat (summ, left_fmt);
32574           StringCat (summ, "'");
32575           StringCat (summ, left_text);
32576           StringCat (summ, "'");
32577         }
32578       }
32579     }
32580     left_text = MemFree (left_text);
32581     right_text = MemFree (right_text);
32582   }
32583   return summ;
32584 }
32585 
32586 
32587 const CharPtr kTaxnameAfterBinomialString = "Taxname after binomial";
32588 
32589 
SummarizeParseSrcGeneralId(ValNodePtr vnp)32590 static CharPtr SummarizeParseSrcGeneralId (ValNodePtr vnp)
32591 {
32592   CharPtr summ = NULL;
32593   CharPtr fmt = "general ID %s tag";
32594 
32595   if (vnp == NULL) {
32596     return StringSave ("invalid id");
32597   }
32598   switch (vnp->choice) {
32599     case ParseSrcGeneralId_whole_text:
32600       summ = StringSave ("entire general ID");
32601       break;
32602     case ParseSrcGeneralId_db:
32603       summ = StringSave ("general ID database");
32604       break;
32605     case ParseSrcGeneralId_tag:
32606       if (vnp->data.ptrvalue == NULL || StringHasNoText (vnp->data.ptrvalue)) {
32607         summ = StringSave ("general ID tag");
32608       } else {
32609         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (vnp->data.ptrvalue)));
32610         sprintf (summ, fmt, vnp->data.ptrvalue);
32611       }
32612       break;
32613     default:
32614       summ = StringSave ("invalid id");
32615       break;
32616   }
32617   return summ;
32618 }
32619 
32620 
SummarizeParseSrc(ValNodePtr src)32621 NLM_EXTERN CharPtr SummarizeParseSrc (ValNodePtr src)
32622 {
32623   CharPtr summ = NULL;
32624   CharPtr fmt = "structured comment field %s";
32625   ParseSrcOrgPtr src_org;
32626   Boolean need_to_save = TRUE;
32627 
32628   if (src != NULL) {
32629     switch (src->choice) {
32630       case ParseSrc_defline:
32631         summ = "defline";
32632         break;
32633       case ParseSrc_flatfile:
32634         summ = "flat file";
32635         break;
32636       case ParseSrc_local_id:
32637         summ = "local ID";
32638         break;
32639       case ParseSrc_org:
32640         src_org = (ParseSrcOrgPtr) src->data.ptrvalue;
32641         if (src_org != NULL) {
32642           if (src_org->field != NULL) {
32643             if (src_org->field->choice == ParseSrcOrgChoice_taxname_after_binomial) {
32644               summ = kTaxnameAfterBinomialString;
32645             } else if (src_org->field->choice == ParseSrcOrgChoice_source_qual) {
32646               summ = GetSourceQualName (src_org->field->data.intvalue);
32647             }
32648           }
32649         }
32650         break;
32651       case ParseSrc_comment:
32652         summ = "comment";
32653         break;
32654       case ParseSrc_bankit_comment:
32655         summ = "BankIT comment";
32656         break;
32657       case ParseSrc_structured_comment:
32658         if (!StringHasNoText (src->data.ptrvalue)) {
32659           summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (src->data.ptrvalue) + StringLen (fmt)));
32660           sprintf (summ, fmt, src->data.ptrvalue);
32661           need_to_save = FALSE;
32662         }
32663         break;
32664       case ParseSrc_file_id:
32665         summ = "file ID";
32666         break;
32667       case ParseSrc_general_id:
32668         summ = SummarizeParseSrcGeneralId(src->data.ptrvalue);
32669         need_to_save = FALSE;
32670         break;
32671     }
32672   }
32673   if (summ == NULL) {
32674     summ = StringSave ("missing field");
32675   } else if (need_to_save) {
32676     summ = StringSave (summ);
32677   }
32678   return summ;
32679 }
32680 
32681 
SummarizeParseDst(ValNodePtr dst)32682 NLM_EXTERN CharPtr SummarizeParseDst (ValNodePtr dst)
32683 {
32684   CharPtr summ = NULL;
32685   CharPtr fmt = "%s %s";
32686   CharPtr feature, field;
32687   ParseDstOrgPtr dst_org;
32688   Boolean need_to_save = TRUE;
32689   FeatureFieldLegalPtr ffp;
32690 
32691   if (dst != NULL) {
32692     switch (dst->choice) {
32693       case ParseDest_defline:
32694         summ = "defline";
32695         break;
32696       case ParseDest_org:
32697         dst_org = (ParseDstOrgPtr) dst->data.ptrvalue;
32698         if (dst_org != NULL) {
32699           if (dst_org->field != NULL) {
32700             switch (dst_org->field->choice) {
32701               case SourceQualChoice_textqual:
32702                 summ = GetSourceQualName (dst_org->field->data.intvalue);
32703                 break;
32704               case SourceQualChoice_location:
32705                 summ = "location";
32706                 break;
32707               case SourceQualChoice_origin:
32708                 summ = "origin";
32709                 break;
32710             }
32711           }
32712         }
32713         break;
32714       case ParseDest_featqual:
32715         ffp = (FeatureFieldLegalPtr) dst->data.ptrvalue;
32716         if (ffp != NULL) {
32717           feature = GetFeatureNameFromFeatureType (ffp->type);
32718           field = GetFeatQualName (ffp->field);
32719           summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (feature) + StringLen (field)));
32720           sprintf (summ, fmt, feature, field);
32721           need_to_save = FALSE;
32722         }
32723         break;
32724       case ParseDest_dbxref:
32725         summ = "dbxref";
32726         break;
32727     }
32728   }
32729   if (summ == NULL) {
32730     summ = StringSave ("missing field");
32731   } else if (need_to_save) {
32732     summ = StringSave (summ);
32733   }
32734   return summ;
32735 }
32736 
32737 
32738 /* summarizing AECR actions */
SummarizeFieldPairType(ValNodePtr vnp,CharPtr connect_word)32739 static CharPtr SummarizeFieldPairType (ValNodePtr vnp, CharPtr connect_word)
32740 {
32741   FeatureFieldPairPtr ffp;
32742   CDSGeneProtFieldPairPtr cgp;
32743   SourceQualPairPtr     quals;
32744   MolinfoFieldPairPtr   m_fields;
32745   RnaQualPairPtr        rna_quals;
32746   CharPtr str = NULL;
32747   CharPtr from_label = NULL, to_label = NULL;
32748   CharPtr label_fmt = "%s %s %s";
32749   CharPtr type_label_fmt = "%s %s %s %s";
32750   CharPtr    label = NULL;
32751 
32752   if (connect_word == NULL) {
32753     connect_word = "to";
32754   }
32755   if (vnp == NULL) {
32756     str = StringSave ("missing field");
32757   } else {
32758     switch (vnp->choice) {
32759       case FieldPairType_source_qual:
32760         if (vnp->data.ptrvalue != NULL) {
32761           quals = (SourceQualPairPtr) vnp->data.ptrvalue;
32762           from_label = GetSourceQualName (quals->field_from);
32763           to_label = GetSourceQualName (quals->field_to);
32764         }
32765         if (from_label != NULL && to_label != NULL) {
32766           str = (CharPtr) MemNew (sizeof (Char) *
32767                                   (StringLen (from_label) + StringLen (connect_word) + StringLen (to_label)
32768                                   + 3));
32769           sprintf (str, "%s %s %s", from_label, connect_word, to_label);
32770         } else {
32771           str = StringSave ("missing field");
32772         }
32773         break;
32774       case FieldPairType_feature_field:
32775         ffp = (FeatureFieldPairPtr) vnp->data.ptrvalue;
32776         if (ffp == NULL || ffp->field_from == NULL || ffp->field_to == NULL) {
32777           str = StringSave ("missing field");
32778         } else {
32779           label = GetFeatureNameFromFeatureType (ffp->type);
32780           from_label = FeatureFieldLabel (label, ffp->field_from);
32781           to_label = FeatureFieldLabel (label, ffp->field_to);
32782           str = (CharPtr) MemNew (sizeof (Char) *
32783                                   (StringLen (label_fmt)
32784                                    + StringLen (from_label) + StringLen (to_label)
32785                                    + StringLen (connect_word)));
32786           sprintf (str, label_fmt, from_label, connect_word, to_label);
32787           from_label = MemFree (from_label);
32788           to_label = MemFree (to_label);
32789         }
32790         break;
32791       case FieldPairType_cds_gene_prot:
32792         cgp = (CDSGeneProtFieldPairPtr) vnp->data.ptrvalue;
32793         from_label = CDSGeneProtNameFromField (cgp->field_from);
32794         to_label = CDSGeneProtNameFromField (cgp->field_to);
32795         str = (CharPtr) MemNew (sizeof (Char) *
32796                                 StringLen (from_label) + StringLen (connect_word) + StringLen (to_label)
32797                                 + 3);
32798         sprintf (str, "%s %s %s", from_label, connect_word, to_label);
32799         break;
32800       case FieldPairType_molinfo_field:
32801         m_fields = (MolinfoFieldPairPtr) vnp->data.ptrvalue;
32802         from_label = NULL;
32803         to_label = NULL;
32804         label = NULL;
32805         switch (m_fields->choice) {
32806           case MolinfoFieldPair_molecule:
32807             from_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->from));
32808             to_label = BiomolNameFromBiomol (BiomolFromMoleculeType (((MolinfoMoleculePairPtr) m_fields->data.ptrvalue)->to));
32809             label = "molecule";
32810             break;
32811           case MolinfoFieldPair_technique:
32812             from_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->from));
32813             to_label = TechNameFromTech (TechFromTechniqueType (((MolinfoTechniquePairPtr) m_fields->data.ptrvalue)->to));
32814             label = "technique";
32815             break;
32816           case MolinfoFieldPair_completedness:
32817             from_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->from));
32818             to_label = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (((MolinfoCompletednessPairPtr) m_fields->data.ptrvalue)->to));
32819             label = "completeness";
32820             break;
32821           case MolinfoFieldPair_mol_class:
32822             from_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->from));
32823             to_label = MolNameFromMol (MolFromMoleculeClassType (((MolinfoMolClassPairPtr) m_fields->data.ptrvalue)->to));
32824             label = "class";
32825             break;
32826           case MolinfoFieldPair_topology:
32827             from_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->from));
32828             to_label = TopologyNameFromTopology (TopologyFromTopologyType (((MolinfoTopologyPairPtr) m_fields->data.ptrvalue)->to));
32829             label = "topology";
32830             break;
32831           case MolinfoFieldPair_strand:
32832             from_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->from));
32833             to_label = StrandNameFromStrand (StrandFromStrandType (((MolinfoStrandPairPtr) m_fields->data.ptrvalue)->to));
32834             label = "strand";
32835             break;
32836         }
32837         if (from_label == NULL) {
32838           from_label = "Unknown value";
32839         }
32840         if (to_label == NULL) {
32841           to_label = "Unknown value";
32842         }
32843         if (label == NULL) {
32844           label = "Unknown molinfo field";
32845         }
32846         str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt)
32847                                                  + StringLen (label)
32848                                                  + StringLen (from_label)
32849                                                  + StringLen (to_label)
32850                                                  + StringLen (connect_word)));
32851         sprintf (str, type_label_fmt, label, from_label, connect_word, to_label);
32852         break;
32853       case FieldPairType_rna_field:
32854         if (vnp->data.ptrvalue != NULL) {
32855           rna_quals = (RnaQualPairPtr) vnp->data.ptrvalue;
32856           label = SummarizeRnaType (rna_quals->type);
32857           from_label = GetNameForRnaField (rna_quals->field_from);
32858           to_label = GetNameForRnaField (rna_quals->field_to);
32859         }
32860         if (from_label != NULL && to_label != NULL && label != NULL) {
32861           str = (CharPtr) MemNew (sizeof (Char) * (StringLen (type_label_fmt)
32862                                   + StringLen (label)
32863                                   + StringLen (from_label) + StringLen (connect_word) + StringLen (to_label)));
32864           sprintf (str, type_label_fmt, label, from_label, connect_word, to_label);
32865         } else {
32866           str = StringSave ("missing field");
32867         }
32868         label = MemFree (label);
32869         break;
32870 
32871       default:
32872         str = StringSave ("Invalid field type");
32873         break;
32874     }
32875   }
32876   return str;
32877 }
32878 
SummarizeApplyAction(ApplyActionPtr a)32879 static CharPtr SummarizeApplyAction (ApplyActionPtr a)
32880 {
32881   CharPtr str = NULL;
32882   CharPtr fmt = "Apply %s to %s (%s)";
32883   CharPtr nontextqual_fmt = "Apply %s (%s)";
32884   CharPtr field, existing_text;
32885 
32886   if (a == NULL) {
32887     str = StringSave ("No action");
32888   } else if (a->value == NULL || a->field == NULL) {
32889     str = StringSave ("Invalid action");
32890   } else {
32891     field = SummarizeFieldType (a->field);
32892     existing_text = SummarizeExistingText (a->existing_text);
32893     if (IsFieldTypeNonText (a->field)) {
32894       str = (CharPtr) MemNew (sizeof (Char) * StringLen (nontextqual_fmt) + StringLen (field) + StringLen (existing_text));
32895       sprintf (str, nontextqual_fmt, field, existing_text);
32896     } else {
32897       str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (a->value) + StringLen (field) + StringLen (existing_text)));
32898       sprintf (str, fmt, a->value, field, existing_text);
32899     }
32900     field = MemFree (field);
32901   }
32902   return str;
32903 }
32904 
32905 
SummarizeEditAction(EditActionPtr a)32906 static CharPtr SummarizeEditAction (EditActionPtr a)
32907 {
32908   CharPtr str = NULL;
32909   CharPtr fmt = "Edit %s replace '%s'%s with '%s'";
32910   CharPtr case_insensitive = " (case insensitive)";
32911   CharPtr field;
32912   Int4    len;
32913 
32914   if (a == NULL) {
32915     str = StringSave ("No action");
32916   } else if (a->field == NULL || a->field == NULL || a->edit == NULL || a->edit->find_txt == NULL) {
32917     str = StringSave ("Invalid action");
32918   } else {
32919     field = SummarizeFieldType (a->field);
32920     len = StringLen (fmt) + StringLen (field) + StringLen (a->edit->find_txt) + StringLen (a->edit->repl_txt);
32921     if (a->edit->case_insensitive) {
32922       len += StringLen (case_insensitive);
32923     }
32924 
32925     str = (CharPtr) MemNew (sizeof (Char) * len);
32926     sprintf (str, fmt, field, a->edit->find_txt,
32927                               a->edit->case_insensitive ? case_insensitive : "",
32928                               a->edit->repl_txt == NULL ? "" : a->edit->repl_txt);
32929     field = MemFree (field);
32930   }
32931   return str;
32932 }
32933 
32934 
SummarizeRemoveOutsideAction(RemoveOutsideActionPtr a)32935 static CharPtr SummarizeRemoveOutsideAction (RemoveOutsideActionPtr a)
32936 {
32937   CharPtr str = NULL;
32938   CharPtr fmt = "Remove %s in %s";
32939   CharPtr case_insensitive = " (case insensitive)";
32940   CharPtr if_not_found = ", remove entire text if search text not found";
32941   CharPtr field, tmp;
32942   Int4    len;
32943 
32944   if (a == NULL) {
32945     str = StringSave ("No action");
32946   } else if (a->field == NULL || a->field == NULL || a->portion == NULL) {
32947     str = StringSave ("Invalid action");
32948   } else {
32949     field = SummarizeFieldType (a->field);
32950     tmp = SummarizeTextPortion (a->portion);
32951 
32952     len = StringLen (fmt) + StringLen (field) + StringLen (tmp);
32953     if (a->remove_if_not_found) {
32954       len += StringLen (if_not_found);
32955     }
32956 
32957     str = (CharPtr) MemNew (sizeof (Char) * len);
32958     sprintf (str, fmt, tmp, field);
32959     if (a->remove_if_not_found) {
32960       StringCat (str, if_not_found);
32961     }
32962     field = MemFree (field);
32963     tmp = MemFree (tmp);
32964   }
32965   return str;
32966 }
32967 
32968 
SummarizeConvertAction(ConvertActionPtr a)32969 static CharPtr SummarizeConvertAction (ConvertActionPtr a)
32970 {
32971   CharPtr str = NULL;
32972   CharPtr fmt = "Convert %s (%s)";
32973   CharPtr fields, existing_text;
32974 
32975   if (a == NULL) {
32976     str = StringSave ("No action");
32977   } else if (a->fields == NULL || a->fields == NULL) {
32978     str = StringSave ("Invalid action");
32979   } else {
32980     fields = SummarizeFieldPairType (a->fields, "to");
32981     existing_text = SummarizeExistingText (a->existing_text);
32982     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text)));
32983     sprintf (str, fmt, fields, existing_text);
32984     fields = MemFree (fields);
32985   }
32986   return str;
32987 }
32988 
32989 
SummarizeCopyAction(CopyActionPtr a)32990 static CharPtr SummarizeCopyAction (CopyActionPtr a)
32991 {
32992   CharPtr str = NULL;
32993   CharPtr fmt = "Copy %s (%s)";
32994   CharPtr fields, existing_text;
32995 
32996   if (a == NULL) {
32997     str = StringSave ("No action");
32998   } else if (a->fields == NULL) {
32999     str = StringSave ("Invalid action");
33000   } else {
33001     fields = SummarizeFieldPairType (a->fields, "to");
33002     existing_text = SummarizeExistingText (a->existing_text);
33003     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields) + StringLen (existing_text)));
33004     sprintf (str, fmt, fields, existing_text);
33005     fields = MemFree (fields);
33006   }
33007   return str;
33008 }
33009 
33010 
SummarizeSwapAction(SwapActionPtr a)33011 static CharPtr SummarizeSwapAction (SwapActionPtr a)
33012 {
33013   CharPtr str = NULL;
33014   CharPtr fmt = "Swap %s";
33015   CharPtr fields;
33016 
33017   if (a == NULL) {
33018     str = StringSave ("No action");
33019   } else if (a->fields == NULL) {
33020     str = StringSave ("Invalid action");
33021   } else {
33022     fields = SummarizeFieldPairType (a->fields, "with");
33023     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fields)));
33024     sprintf (str, fmt, fields);
33025     fields = MemFree (fields);
33026   }
33027   return str;
33028 }
33029 
33030 
SummarizeCapChange(Uint1 cap_change)33031 static CharPtr SummarizeCapChange (Uint1 cap_change)
33032 {
33033   CharPtr rval = NULL;
33034 
33035   switch (cap_change) {
33036     case Cap_change_tolower:
33037       rval = StringSave ("change capitalization to lower");
33038       break;
33039     case Cap_change_toupper:
33040       rval = StringSave ("change capitalization to upper");
33041       break;
33042     case Cap_change_firstcap:
33043       rval = StringSave ("capitalize first letter, remaining lower case");
33044       break;
33045     case Cap_change_firstcaprestnochange:
33046       rval = StringSave ("capitalize first letter, do not change other characters");
33047       break;
33048     case Cap_change_firstlower_restnochange:
33049       rval = StringSave ("lowercase first letter, do not change other characters");
33050       break;
33051     case Cap_change_cap_word_space:
33052       rval = StringSave ("capitalize first letter and letters after spaces");
33053       break;
33054     case Cap_change_cap_word_space_punc:
33055       rval = StringSave ("capitalize first letter and letters after spaces or punctuation");
33056       break;
33057   }
33058   return rval;
33059 }
33060 
33061 
SummarizeTextTransform(ValNodePtr transform)33062 NLM_EXTERN CharPtr SummarizeTextTransform (ValNodePtr transform)
33063 {
33064   FieldEditPtr edit;
33065   CharPtr      replace_fmt = "replace '%s' with '%s'";
33066   CharPtr      remove_fmt = "remove %s";
33067   CharPtr      case_insensitive = " (case insensitive)";
33068   CharPtr rval = NULL, tmp;
33069   Int4    len = 0;
33070 
33071   if (transform == NULL) {
33072     return NULL;
33073   }
33074 
33075   switch (transform->choice) {
33076     case TextTransform_edit:
33077       if ((edit = (FieldEditPtr) transform->data.ptrvalue) != NULL) {
33078         len = StringLen (replace_fmt) + StringLen (edit->find_txt) + StringLen (edit->repl_txt);
33079         if (edit->case_insensitive) {
33080           len += StringLen (case_insensitive);
33081         }
33082         rval = (CharPtr) MemNew (sizeof (Char) * len);
33083         sprintf (rval, replace_fmt, edit->find_txt == NULL ? "" : edit->find_txt, edit->repl_txt == NULL ? "" : edit->repl_txt);
33084         if (edit->case_insensitive) {
33085           StringCat (rval, case_insensitive);
33086         }
33087       }
33088       break;
33089     case TextTransform_caps:
33090       rval = SummarizeCapChange(transform->data.intvalue);
33091       break;
33092     case TextTransform_remove:
33093       tmp = SummarizeTextPortion (transform->data.ptrvalue);
33094       rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (remove_fmt) + StringLen (tmp)));
33095       sprintf (rval, remove_fmt, tmp);
33096       tmp = MemFree (tmp);
33097       break;
33098   }
33099   return rval;
33100 }
33101 
33102 
SummarizeTextTransformList(ValNodePtr text_transform)33103 static CharPtr SummarizeTextTransformList (ValNodePtr text_transform)
33104 {
33105   ValNodePtr str_list = NULL, vnp;
33106   Int4       len = 0;
33107   CharPtr    rval = NULL, tmp;
33108 
33109   for (vnp = text_transform; vnp != NULL; vnp = vnp->next) {
33110     tmp = SummarizeTextTransform (vnp);
33111     if (tmp != NULL) {
33112       ValNodeAddPointer (&str_list, 0, tmp);
33113       len += StringLen (tmp) + 3;
33114     }
33115   }
33116 
33117   rval = (CharPtr) MemNew (sizeof (Char) * len);
33118   for (vnp = str_list; vnp != NULL; vnp = vnp->next) {
33119     StringCat (rval, vnp->data.ptrvalue);
33120     if (vnp->next != NULL) {
33121       StringCat (rval, ", ");
33122     }
33123   }
33124   str_list = ValNodeFreeData (str_list);
33125   return rval;
33126 }
33127 
33128 
SummarizeAECRParseAction(AECRParseActionPtr a)33129 static CharPtr SummarizeAECRParseAction (AECRParseActionPtr a)
33130 {
33131   CharPtr str = NULL;
33132   CharPtr fmt = "Parse %s%s%s from %s(%s)";
33133   CharPtr fields, existing_text, text_portion, transform;
33134 
33135   if (a == NULL) {
33136     str = StringSave ("No action");
33137   } else if (a->fields == NULL) {
33138     str = StringSave ("Invalid action");
33139   } else {
33140     fields = SummarizeFieldPairType (a->fields, "to");
33141     existing_text = SummarizeExistingText (a->existing_text);
33142     text_portion = SummarizeTextPortion (a->portion);
33143     transform = SummarizeTextTransformList(a->transform);
33144     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (text_portion) + StringLen (transform) + StringLen (fields) + StringLen (existing_text)));
33145     sprintf (str, fmt, text_portion, transform == NULL ? "" : " ", transform == NULL ? "" : transform, fields, existing_text);
33146     fields = MemFree (fields);
33147     text_portion = MemFree (text_portion);
33148     transform = MemFree (transform);
33149   }
33150   return str;
33151 }
33152 
33153 
SummarizeRemoveAction(RemoveActionPtr a)33154 static CharPtr SummarizeRemoveAction (RemoveActionPtr a)
33155 {
33156   CharPtr str = NULL;
33157   CharPtr fmt = "Remove %s";
33158   CharPtr field;
33159 
33160   if (a == NULL) {
33161     str = StringSave ("No action");
33162   } else if (a->field == NULL || a->field == NULL) {
33163     str = StringSave ("Invalid action");
33164   } else {
33165     field = SummarizeFieldType (a->field);
33166     str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (field)));
33167     sprintf (str, fmt, field);
33168     field = MemFree (field);
33169   }
33170   return str;
33171 }
33172 
33173 
SummarizeAECRAction(AECRActionPtr a)33174 NLM_EXTERN CharPtr SummarizeAECRAction (AECRActionPtr a)
33175 {
33176   CharPtr str = NULL, act = NULL, constraint = NULL;
33177   if (a == NULL) {
33178     str = StringSave ("No action");
33179   } else if (a->action == NULL) {
33180     str = StringSave ("Invalid command");
33181   } else {
33182     switch (a->action->choice) {
33183       case ActionChoice_apply:
33184         act = SummarizeApplyAction (a->action->data.ptrvalue);
33185         break;
33186       case ActionChoice_edit:
33187         act = SummarizeEditAction (a->action->data.ptrvalue);
33188         break;
33189       case ActionChoice_remove_outside:
33190         act = SummarizeRemoveOutsideAction (a->action->data.ptrvalue);
33191         break;
33192       case ActionChoice_convert:
33193         act = SummarizeConvertAction (a->action->data.ptrvalue);
33194         break;
33195       case ActionChoice_copy:
33196         act = SummarizeCopyAction (a->action->data.ptrvalue);
33197         break;
33198       case ActionChoice_swap:
33199         act = SummarizeSwapAction (a->action->data.ptrvalue);
33200         break;
33201       case ActionChoice_remove:
33202         act = SummarizeRemoveAction (a->action->data.ptrvalue);
33203         break;
33204       case ActionChoice_parse:
33205         act = SummarizeAECRParseAction (a->action->data.ptrvalue);
33206         break;
33207     }
33208     if (act == NULL) {
33209       str = StringSave ("Invalid action");
33210     } else {
33211       constraint = SummarizeConstraintSet (a->constraint);
33212       if (constraint == NULL) {
33213         str = act;
33214       } else {
33215         str = (CharPtr) MemNew (sizeof (Char) * (StringLen(act) + 2 + StringLen (constraint)));
33216         sprintf (str, "%s %s", act, constraint);
33217         act = MemFree (act);
33218         constraint = MemFree (constraint);
33219       }
33220     }
33221   }
33222   return str;
33223 }
33224 
33225 
SummarizeParseAction(ParseActionPtr p)33226 NLM_EXTERN CharPtr SummarizeParseAction (ParseActionPtr p)
33227 {
33228   CharPtr field_from = NULL, field_to = NULL;
33229   CharPtr existing_text = NULL, text_portion = NULL, transform;
33230   CharPtr summ = NULL;
33231   CharPtr fmt = "Parse %s from %s to %s%s%s (%s)";
33232 
33233   if (p == NULL) {
33234     summ = StringSave ("No action");
33235   } else {
33236     field_from = SummarizeParseSrc (p->src);
33237     field_to = SummarizeParseDst (p->dest);
33238     existing_text = SummarizeExistingText (p->existing_text);
33239     text_portion = SummarizeTextPortion (p->portion);
33240     transform = SummarizeTextTransformList(p->transform);
33241 
33242     summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt)
33243                              + StringLen (text_portion)
33244                              + StringLen (field_from)
33245                              + StringLen (field_to)
33246                              + StringLen (transform)
33247                              + StringLen (existing_text)));
33248     sprintf (summ, fmt, text_portion, field_from, field_to, transform == NULL ? "" : " ", transform == NULL ? "" : transform, existing_text);
33249     text_portion = MemFree (text_portion);
33250     field_from = MemFree (field_from);
33251     field_to = MemFree (field_to);
33252   }
33253   return summ;
33254 }
33255 
33256 
SummarizeAutodefClauseListType(Uint2 clause_list_type)33257 static CharPtr SummarizeAutodefClauseListType (Uint2 clause_list_type)
33258 {
33259   CharPtr str = "complete sequence";
33260 
33261   switch (clause_list_type) {
33262     case Autodef_list_type_feature_list:
33263       str = "list features";
33264       break;
33265     case Autodef_list_type_complete_sequence:
33266       str = "complete sequence";
33267       break;
33268     case Autodef_list_type_complete_genome:
33269       str = "complete genome";
33270       break;
33271     case Autodef_list_type_sequence:
33272       str = "sequence";
33273       break;
33274   }
33275   return str;
33276 }
33277 
33278 
SummarizeAutodefAction(AutodefActionPtr autodef)33279 NLM_EXTERN CharPtr SummarizeAutodefAction (AutodefActionPtr autodef)
33280 {
33281   CharPtr    label = NULL, mod_name;
33282   CharPtr    str = NULL;
33283   CharPtr    fmt = "Autodef %s";
33284   CharPtr    modifiers_fmt = " with modifier";
33285   CharPtr    misc_feat_rule = NULL;
33286   Int4       len;
33287   ValNodePtr mod_names = NULL, vnp;
33288 
33289   if (autodef == NULL) {
33290     str = StringSave ("No action");
33291   } else {
33292     label = SummarizeAutodefClauseListType (autodef->clause_list_type);
33293     if (autodef->clause_list_type == Autodef_list_type_feature_list) {
33294       if (autodef->misc_feat_parse_rule == 1) {
33295         misc_feat_rule = ", use misc-feat comment before first semicolon";
33296       } else if (autodef->misc_feat_parse_rule == 2) {
33297         misc_feat_rule = ", look for non-coding product in misc-feat comment";
33298       }
33299     }
33300     len = StringLen (fmt) + StringLen (label) + StringLen (misc_feat_rule);
33301     if (autodef->modifiers != NULL) {
33302       len += StringLen (modifiers_fmt) + 2;
33303       for (vnp = autodef->modifiers; vnp != NULL; vnp = vnp->next) {
33304         mod_name = GetSourceQualName (vnp->data.intvalue);
33305         len += StringLen (mod_name) + 3;
33306         ValNodeAddPointer (&mod_names, 0, mod_name);
33307       }
33308     }
33309 
33310     str = (CharPtr) MemNew (sizeof (Char) * (len + 1));
33311     sprintf (str, fmt, label);
33312 
33313     if (autodef->modifiers != NULL) {
33314       StringCat (str, modifiers_fmt);
33315       if (autodef->modifiers->next != NULL) {
33316         StringCat (str, "s");
33317       }
33318       for (vnp = mod_names; vnp != NULL; vnp = vnp->next) {
33319         StringCat (str, " ");
33320         StringCat (str, vnp->data.ptrvalue);
33321         if (vnp->next != NULL) {
33322           StringCat (str, ",");
33323         }
33324       }
33325     }
33326 
33327     mod_names = ValNodeFree (mod_names);
33328 
33329     if (misc_feat_rule != NULL) {
33330       StringCat (str, misc_feat_rule);
33331     }
33332   }
33333 
33334   return str;
33335 }
33336 
33337 
SummarizeRemoveDescriptorAction(RemoveDescriptorActionPtr a)33338 NLM_EXTERN CharPtr SummarizeRemoveDescriptorAction (RemoveDescriptorActionPtr a)
33339 {
33340   CharPtr    label = NULL;
33341   CharPtr    constraint, str;
33342   CharPtr    fmt = "Remove %s";
33343   CharPtr    constraint_fmt = "Remove %s descriptors %s";
33344 
33345   if (a == NULL) {
33346     str = StringSave ("No action");
33347   } else {
33348     label = GetDescriptorNameFromDescriptorType (a->type);
33349     constraint = SummarizeConstraintSet (a->constraint);
33350     if (constraint == NULL) {
33351       str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label)));
33352       sprintf (str, fmt, label);
33353     } else {
33354       str = (CharPtr) MemNew (sizeof (Char) * (StringLen (constraint_fmt) + StringLen (label) + StringLen (constraint)));
33355       sprintf (str, constraint_fmt, label, constraint);
33356       constraint = MemFree (constraint);
33357     }
33358   }
33359 
33360   return str;
33361 }
33362 
33363 
SummarizeFixPubCapsAction(FixPubCapsActionPtr a)33364 NLM_EXTERN CharPtr SummarizeFixPubCapsAction (FixPubCapsActionPtr a)
33365 {
33366   CharPtr constraint = NULL;
33367   Int4    len = 0;
33368   CharPtr descriptions[] = {"affiliation", "title", "authors", "affiliation country"};
33369   CharPtr punct_only = " (punctuation only)";
33370   Boolean present[4];
33371   Int4    i, first = 4, last = 0, num_items = 0;
33372   CharPtr summ = NULL;
33373 
33374   if (a == NULL) {
33375     return NULL;
33376   }
33377 
33378   present[0] = a->affiliation;
33379   present[1] = a->title;
33380   present[2] = a->authors;
33381   present[3] = a->affil_country;
33382 
33383   for (i = 0; i < 4; i++) {
33384     if (present[i]) {
33385       len += 6 + StringLen (descriptions[i]);
33386       if (first == 4) {
33387         first = i;
33388       }
33389       last = i;
33390       num_items++;
33391     }
33392   }
33393 
33394   if (len > 0) {
33395     if (a->punct_only) {
33396       len += StringLen (punct_only);
33397     }
33398     constraint = SummarizeConstraintSet (a->constraint);
33399     len += StringLen (constraint) + 14;
33400     summ = (CharPtr) MemNew (sizeof (Char) * len);
33401     sprintf (summ, "Fix pub ");
33402     for (i = 0; i < 4; i++) {
33403       if (present[i]) {
33404         if (i != first) {
33405           if (num_items > 2) {
33406             StringCat (summ, ", ");
33407           }
33408           if (i == last) {
33409             StringCat (summ, " and ");
33410           }
33411         }
33412         StringCat (summ, descriptions[i]);
33413       }
33414     }
33415     if (a->punct_only) {
33416       StringCat (summ, punct_only);
33417     }
33418     if (constraint != NULL) {
33419       StringCat (summ, " where ");
33420       StringCat (summ, constraint);
33421     }
33422     constraint = MemFree (constraint);
33423   }
33424 
33425   return summ;
33426 }
33427 
33428 
SummarizeFixAuthorCaps(FixAuthorCapsPtr action)33429 static CharPtr SummarizeFixAuthorCaps (FixAuthorCapsPtr action)
33430 {
33431   if (action == NULL) {
33432     return StringSave ("Invalid action");
33433   } else if (action->last_name_only) {
33434     return StringSave ("Fix capitalization in author last names where last name is all caps");
33435   } else {
33436     return StringSave ("Fix capitalization in author name where name is all caps");
33437   }
33438 }
33439 
33440 
SummarizeFixCapsAction(FixCapsActionPtr action)33441 NLM_EXTERN CharPtr SummarizeFixCapsAction (FixCapsActionPtr action)
33442 {
33443   CharPtr summ = NULL, tmp;
33444   CharPtr fmt = "Fix capitalization in %s source qualifier";
33445 
33446   if (action == NULL) {
33447     summ = StringSave ("Invalid action");
33448   } else {
33449     switch (action->choice) {
33450       case FixCapsAction_pub:
33451         summ = SummarizeFixPubCapsAction (action->data.ptrvalue);
33452         break;
33453       case FixCapsAction_src_country:
33454         summ = StringSave ("Fix source country qualifier capitalization");
33455         break;
33456       case FixCapsAction_mouse_strain:
33457         summ = StringSave ("Fix capitalization in common Mus musculus strains");
33458         break;
33459       case FixCapsAction_src_qual:
33460         tmp = GetSourceQualName (action->data.intvalue);
33461         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt)));
33462         sprintf (summ, fmt, tmp);
33463         break;
33464       case FixCapsAction_author:
33465         summ = SummarizeFixAuthorCaps (action->data.ptrvalue);
33466         break;
33467       default:
33468         summ = StringSave ("Invalid action");
33469         break;
33470     }
33471   }
33472   return summ;
33473 }
33474 
33475 
SummarizeFixFormatAction(FixFormatActionPtr action)33476 NLM_EXTERN CharPtr SummarizeFixFormatAction (FixFormatActionPtr action)
33477 {
33478   CharPtr summ = NULL;
33479   if (action == NULL) {
33480     summ = StringSave ("Invalid action");
33481   } else {
33482     switch (action->choice) {
33483       case FixFormatAction_collection_date:
33484         summ = StringSave ("Fix collection-date format");
33485         break;
33486       case FixFormatAction_lat_lon:
33487         summ = StringSave ("Fix lat-lon format");
33488         break;
33489       case FixFormatAction_primers:
33490         summ = StringSave ("Fix i in primer sequence");
33491         break;
33492       case FixFormatAction_protein_name:
33493         summ = StringSave ("Remove organism names from protein names");
33494         break;
33495       default:
33496         summ = StringSave ("Invalid action");
33497         break;
33498     }
33499   }
33500   return summ;
33501 }
33502 
33503 
SummarizeRemoveDuplicateFeaturesAction(RemoveDuplicateFeatureActionPtr action)33504 NLM_EXTERN CharPtr SummarizeRemoveDuplicateFeaturesAction (RemoveDuplicateFeatureActionPtr action)
33505 {
33506   CharPtr summ = NULL;
33507   CharPtr start_fmt = "Remove duplicate%s%s features";
33508   CharPtr feat_type;
33509   CharPtr case_sensitive = "(case-sensitive)";
33510   CharPtr ignore_partials = "(ignore partials)";
33511   CharPtr remove_proteins = " and remove protein products";
33512 
33513   Int4    len = 0;
33514 
33515   if (action == NULL) {
33516     summ = StringSave ("Invalid action");
33517   } else {
33518     len = StringLen (start_fmt);
33519     if (action->type == Macro_feature_type_any) {
33520       feat_type = "";
33521     } else {
33522       feat_type = GetFeatureNameFromFeatureType (action->type);
33523     }
33524     len += StringLen (feat_type) + 1;
33525     if (action->case_sensitive) {
33526       len += StringLen (case_sensitive);
33527     }
33528     if (action->ignore_partials) {
33529       len += StringLen (ignore_partials);
33530     }
33531     if (action->remove_proteins) {
33532       len += StringLen (remove_proteins);
33533     }
33534 
33535     summ = (CharPtr) MemNew (sizeof (Char) * len);
33536     sprintf (summ, start_fmt, action->type == Macro_feature_type_any ? "" : " ", feat_type);
33537     if (action->case_sensitive) {
33538       StringCat (summ, case_sensitive);
33539     }
33540     if (action->ignore_partials) {
33541       StringCat (summ, ignore_partials);
33542     }
33543     if (action->remove_proteins) {
33544       StringCat (summ, remove_proteins);
33545     }
33546   }
33547   return summ;
33548 }
33549 
33550 
33551 
GetSortOrderName(Uint2 order)33552 NLM_EXTERN CharPtr GetSortOrderName (Uint2 order)
33553 {
33554   CharPtr rval = NULL;
33555 
33556   switch (order) {
33557     case Sort_order_short_to_long:
33558       rval = "by length, short to long";
33559       break;
33560     case Sort_order_long_to_short:
33561       rval = "by length, long to short";
33562       break;
33563     case Sort_order_alphabetical:
33564       rval = "alphabetically";
33565       break;
33566     default:
33567       rval = "unknown order";
33568       break;
33569   }
33570   return rval;
33571 }
33572 
33573 
SummarizeSortFieldsAction(SortFieldsActionPtr action)33574 NLM_EXTERN CharPtr SummarizeSortFieldsAction (SortFieldsActionPtr action)
33575 {
33576   CharPtr label, order, constraint, summ;
33577   CharPtr fmt = "Sort %s fields %s%s%s";
33578 
33579   label = SummarizeFieldType (action->field);
33580   order = GetSortOrderName(action->order);
33581   constraint = SummarizeConstraintSet (action->constraint);
33582 
33583   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen(order) + StringLen (constraint)));
33584   sprintf (summ, fmt, label, order, constraint == NULL ? "" : " where ", constraint == NULL ? "" : constraint);
33585   label = MemFree (label);
33586   constraint = MemFree (constraint);
33587 
33588   return summ;
33589 }
33590 
33591 
SummarizeMolinfoBlockAction(MolinfoBlockPtr mib)33592 NLM_EXTERN CharPtr SummarizeMolinfoBlockAction (MolinfoBlockPtr mib)
33593 {
33594   CharPtr field_label, constraint, summ;
33595   ValNodePtr field, field_strs = NULL, from_strs = NULL, vnp;
33596   Int4 len = 11;
33597   Int4 num_from = 0;
33598   Int4 num_to = 0;
33599 
33600   if (mib == NULL) {
33601     return NULL;
33602   }
33603 
33604 
33605   for (field = mib->to_list; field != NULL; field = field->next) {
33606     field_label = GetSequenceQualName (field);
33607     ValNodeAddPointer (&field_strs, 0, field_label);
33608     len += StringLen (field_label) + 2;
33609     num_to++;
33610   }
33611 
33612   for (field = mib->from_list; field != NULL; field = field->next) {
33613     field_label = GetSequenceQualName (field);
33614     ValNodeAddPointer (&from_strs, 0, field_label);
33615     len += StringLen (field_label) + 2;
33616     num_from++;
33617   }
33618 
33619   constraint = SummarizeConstraintSet (mib->constraint);
33620   len += StringLen (constraint);
33621   if (constraint != NULL || num_from > 0) {
33622     len += 12;
33623   }
33624 
33625   if (num_to > 1) {
33626     len += 5;
33627   }
33628   if (num_from > 1) {
33629     len += 5;
33630   }
33631 
33632   summ = (CharPtr) MemNew (sizeof (Char) * len);
33633   sprintf (summ, "Change to ");
33634   for (vnp = field_strs; vnp != NULL; vnp = vnp->next) {
33635     StringCat (summ, vnp->data.ptrvalue);
33636     if (vnp->next != NULL) {
33637       if (num_to > 2) {
33638         if (vnp->next->next == NULL) {
33639           StringCat (summ, ", and");
33640         } else {
33641           StringCat (summ, ", ");
33642         }
33643       } else {
33644         StringCat (summ, " and ");
33645       }
33646     }
33647   }
33648 
33649   if (num_from > 0 || constraint != NULL) {
33650     StringCat (summ, " where ");
33651   }
33652 
33653   for (vnp = from_strs; vnp != NULL; vnp = vnp->next) {
33654     StringCat (summ, vnp->data.ptrvalue);
33655     if (vnp->next != NULL) {
33656       if (num_from > 2) {
33657         if (vnp->next->next == NULL && constraint == NULL) {
33658           StringCat (summ, ", and");
33659         } else {
33660           StringCat (summ, ", ");
33661         }
33662       } else if (constraint == NULL) {
33663         StringCat (summ, " and ");
33664       } else {
33665         StringCat (summ, ", ");
33666       }
33667     }
33668   }
33669 
33670   if (constraint != NULL && num_from > 0) {
33671     StringCat (summ, " and ");
33672   }
33673 
33674   StringCat (summ, constraint);
33675 
33676   field_strs = ValNodeFreeData (field_strs);
33677   from_strs = ValNodeFreeData (from_strs);
33678   constraint = MemFree (constraint);
33679 
33680   return summ;
33681 
33682 }
33683 
33684 
33685 /* summarizing constraints */
GetStringLocationPhrase(Uint2 match_location,Boolean not_present)33686 static CharPtr GetStringLocationPhrase (Uint2 match_location, Boolean not_present)
33687 {
33688   CharPtr location_word = NULL;
33689 
33690   switch (match_location) {
33691     case String_location_contains :
33692       if (not_present) {
33693         location_word = "does not contain";
33694       } else {
33695         location_word = "contains";
33696       }
33697       break;
33698     case String_location_equals :
33699       if (not_present) {
33700         location_word = "does not equal";
33701       } else {
33702         location_word = "equals";
33703       }
33704       break;
33705     case String_location_starts :
33706       if (not_present) {
33707         location_word = "does not start with";
33708       } else {
33709         location_word = "starts with";
33710       }
33711       break;
33712     case String_location_ends :
33713       if (not_present) {
33714         location_word = "does not end with";
33715       } else {
33716         location_word = "ends with";
33717       }
33718       break;
33719     case String_location_inlist :
33720       if (not_present) {
33721         location_word = "is not one of";
33722       } else {
33723         location_word = "is one of";
33724       }
33725       break;
33726   }
33727   return location_word;
33728 }
33729 
33730 
33731 static const CharPtr kCaseSensitive = "case-sensitive";
33732 static const CharPtr kWholeWord = "whole word";
33733 
SummarizeWordSubstitution(WordSubstitutionPtr word)33734 NLM_EXTERN CharPtr SummarizeWordSubstitution (WordSubstitutionPtr word)
33735 {
33736   CharPtr fmt = "allow '%s' to be replaced by '%s'";
33737   Int4    len = 0;
33738   ValNodePtr vnp;
33739   CharPtr summ = NULL;
33740 
33741   if (word == NULL || word->synonyms == NULL) {
33742     return NULL;
33743   }
33744 
33745   len = StringLen (fmt) + StringLen (word->word);
33746   for (vnp = word->synonyms; vnp != NULL; vnp = vnp->next) {
33747     len += StringLen (vnp->data.ptrvalue) + 4;
33748   }
33749 
33750   if (word->case_sensitive) {
33751     len += StringLen (kCaseSensitive) + 3;
33752   }
33753   if (word->whole_word) {
33754     len += StringLen (kWholeWord) + 3;
33755   }
33756 
33757 
33758   summ = (CharPtr) MemNew (sizeof (Char) * len);
33759   sprintf (summ, fmt,
33760            word->word == NULL ? "" : word->word,
33761            (word->synonyms == NULL || word->synonyms->data.ptrvalue == NULL) ? "" : word->synonyms->data.ptrvalue);
33762   if (word->synonyms != NULL) {
33763     for (vnp = word->synonyms->next; vnp != NULL; vnp = vnp->next) {
33764       if (word->synonyms->next->next != NULL) {
33765         StringCat (summ, ",");
33766       }
33767       StringCat (summ, " ");
33768       if (vnp->next == NULL) {
33769         StringCat (summ, "and ");
33770       }
33771       StringCat (summ, "'");
33772       if (vnp->data.ptrvalue != NULL) {
33773         StringCat (summ, vnp->data.ptrvalue);
33774       }
33775       StringCat (summ, "'");
33776     }
33777   }
33778   if (word->case_sensitive) {
33779     StringCat (summ, ", ");
33780     StringCat (summ, kCaseSensitive);
33781   }
33782   if (word->whole_word) {
33783     StringCat (summ, ", ");
33784     StringCat (summ, kWholeWord);
33785   }
33786 
33787   return summ;
33788 }
33789 
33790 
SummarizeStringConstraintEx(StringConstraintPtr constraint,Boolean short_version)33791 NLM_EXTERN CharPtr SummarizeStringConstraintEx (StringConstraintPtr constraint, Boolean short_version)
33792 {
33793   CharPtr location_word = NULL;
33794   CharPtr ignore_space = "ignore spaces";
33795   CharPtr ignore_punct = "ignore punctuation";
33796   CharPtr ignore_weasel = "ignore 'putative' synonyms";
33797   CharPtr str = NULL;
33798   Int4 len;
33799   CharPtr fmt = "%s '%s'";
33800   Boolean has_extra = FALSE;
33801   WordSubstitutionPtr word;
33802   ValNodePtr subst_words = NULL, vnp;
33803   CharPtr tmp;
33804 
33805   if (IsStringConstraintEmpty (constraint)) return NULL;
33806 
33807   if (constraint->match_text != NULL) {
33808     location_word = GetStringLocationPhrase (constraint->match_location, constraint->not_present);
33809     if (location_word == NULL) return NULL;
33810     len = StringLen (location_word) + StringLen (constraint->match_text) + StringLen (fmt);
33811 
33812     if (!short_version) {
33813       if (constraint->case_sensitive) {
33814         len += StringLen (kCaseSensitive) + 3;
33815       }
33816       if (constraint->whole_word) {
33817         len += StringLen (kWholeWord) + 3;
33818       }
33819       if (constraint->ignore_space) {
33820         len += StringLen (ignore_space) + 3;
33821       }
33822       if (constraint->ignore_punct) {
33823         len += StringLen (ignore_punct) + 3;
33824       }
33825       if (constraint->ignore_weasel) {
33826         len += StringLen (ignore_weasel) + 3;
33827       }
33828 
33829       /* allocate space for substitution phrases */
33830       for (word = constraint->ignore_words; word != NULL; word = word->next) {
33831         tmp = SummarizeWordSubstitution (word);
33832         if (tmp != NULL) {
33833           ValNodeAddPointer (&subst_words, 0, tmp);
33834           len += StringLen (tmp) + 2;
33835         }
33836       }
33837     }
33838 
33839     str = (CharPtr) MemNew (sizeof (Char) * len);
33840     sprintf (str, fmt, location_word, constraint->match_text);
33841     if (!short_version) {
33842       if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) {
33843         StringCat (str, " (");
33844       }
33845       if (constraint->case_sensitive) {
33846         StringCat (str, kCaseSensitive);
33847         has_extra = TRUE;
33848       }
33849       if (constraint->whole_word) {
33850         if (has_extra) {
33851           StringCat (str, ", ");
33852         }
33853         StringCat (str, kWholeWord);
33854         has_extra = TRUE;
33855       }
33856       if (constraint->ignore_space) {
33857         if (has_extra) {
33858           StringCat (str, ", ");
33859         }
33860         StringCat (str, ignore_space);
33861         has_extra = TRUE;
33862       }
33863       if (constraint->ignore_punct) {
33864         if (has_extra) {
33865           StringCat (str, ", ");
33866         }
33867         StringCat (str, ignore_punct);
33868         has_extra = TRUE;
33869       }
33870       if (constraint->ignore_weasel) {
33871         if (has_extra) {
33872           StringCat (str, ", ");
33873         }
33874         StringCat (str, ignore_weasel);
33875         has_extra = TRUE;
33876       }
33877 
33878       if (constraint->case_sensitive || constraint->whole_word || constraint->ignore_space || constraint->ignore_punct) {
33879         StringCat (str, ")");
33880       }
33881 
33882       for (vnp = subst_words; vnp != NULL; vnp = vnp->next) {
33883         StringCat (str, ", ");
33884         StringCat (str, vnp->data.ptrvalue);
33885       }
33886 
33887       subst_words = ValNodeFreeData (subst_words);
33888     }
33889   }
33890   if (constraint->is_all_caps) {
33891     SetStringValue(&str, "all letters are uppercase", ExistingTextOption_append_comma);
33892   }
33893   if (constraint->is_all_lower) {
33894     SetStringValue(&str, "all letters are lowercase", ExistingTextOption_append_comma);
33895   }
33896   if (constraint->is_all_punct) {
33897     SetStringValue(&str, "all characters are punctuation", ExistingTextOption_append_comma);
33898   }
33899 
33900   return str;
33901 }
33902 
33903 
SummarizeStringConstraint(StringConstraintPtr constraint)33904 NLM_EXTERN CharPtr SummarizeStringConstraint (StringConstraintPtr constraint)
33905 {
33906   return SummarizeStringConstraintEx (constraint, FALSE);
33907 }
33908 
33909 
SummarizePartialnessForLocationConstraint(LocationConstraintPtr constraint)33910 static CharPtr SummarizePartialnessForLocationConstraint (LocationConstraintPtr constraint)
33911 {
33912   if (constraint == NULL
33913       || (constraint->partial5 == Partial_constraint_either
33914       && constraint->partial3 == Partial_constraint_either)) {
33915     return NULL;
33916   }
33917   if (constraint->partial5 == Partial_constraint_either) {
33918     if (constraint->partial3 == Partial_constraint_partial) {
33919       return "that are 3' partial";
33920     } else {
33921       return "that are 3' complete";
33922     }
33923   } else if (constraint->partial3 == Partial_constraint_either) {
33924     if (constraint->partial5 == Partial_constraint_partial) {
33925       return "that are 5' partial";
33926     } else {
33927       return "that are 5' complete";
33928     }
33929   } else if (constraint->partial5 == Partial_constraint_partial
33930              && constraint->partial3 == Partial_constraint_partial) {
33931     return "that are partial on both ends";
33932   } else if (constraint->partial5 == Partial_constraint_complete
33933              && constraint->partial3 == Partial_constraint_complete) {
33934     return "that are complete on both ends";
33935   } else if (constraint->partial5 == Partial_constraint_complete
33936              && constraint->partial3 == Partial_constraint_partial) {
33937     return "that are 5' complete and 3' partial";
33938   } else if (constraint->partial5 == Partial_constraint_partial
33939              && constraint->partial3 == Partial_constraint_complete) {
33940     return "that are 5' partial and 3' complete";
33941   } else {
33942     return NULL;
33943   }
33944 }
33945 
33946 
SummarizeLocationType(LocationConstraintPtr constraint)33947 static CharPtr SummarizeLocationType (LocationConstraintPtr constraint)
33948 {
33949   if (constraint == NULL
33950       || constraint->location_type == Location_type_constraint_any) {
33951     return NULL;
33952   } else if (constraint->location_type == Location_type_constraint_single_interval) {
33953     return "with single interval";
33954   } else if (constraint->location_type == Location_type_constraint_joined) {
33955     return "with joined intervals";
33956   } else if (constraint->location_type == Location_type_constraint_ordered) {
33957     return "with ordered intervals";
33958   } else {
33959     return NULL;
33960   }
33961 }
33962 
33963 
33964 static CharPtr distance_words[] = { NULL, "exactly", "no more than", "no less than" };
33965 
SummarizeEndDistance(ValNodePtr vnp,CharPtr end_name)33966 static CharPtr SummarizeEndDistance (ValNodePtr vnp, CharPtr end_name)
33967 {
33968   CharPtr str = NULL;
33969   CharPtr fmt = "with %s %s %d from end of sequence";
33970 
33971   if (vnp == NULL || vnp->choice < 1 || vnp->choice > 3) {
33972     return NULL;
33973   }
33974 
33975   str = (CharPtr) MemNew (sizeof (Char) * (StringLen (distance_words[vnp->choice]) + StringLen (end_name) + StringLen (fmt) + 15));
33976   sprintf (str, fmt, end_name, distance_words[vnp->choice], vnp->data.intvalue);
33977 
33978   return str;
33979 }
33980 
33981 
SummarizeLocationConstraint(LocationConstraintPtr constraint)33982 static CharPtr SummarizeLocationConstraint (LocationConstraintPtr constraint)
33983 {
33984   CharPtr str = NULL;
33985   CharPtr strand_word = NULL, seq_word = NULL;
33986   CharPtr fmt = "only objects";
33987   CharPtr partial;
33988   CharPtr location_type;
33989   CharPtr dist5 = NULL, dist3 = NULL;
33990   Int4    len = 0;
33991 
33992   if (IsLocationConstraintEmpty (constraint)) {
33993     return NULL;
33994   }
33995 
33996   partial = SummarizePartialnessForLocationConstraint (constraint);
33997   location_type = SummarizeLocationType(constraint);
33998   dist5 = SummarizeEndDistance (constraint->end5, "5' end");
33999   dist3 = SummarizeEndDistance (constraint->end3, "3' end");
34000 
34001   if (constraint->seq_type == Seqtype_constraint_nuc) {
34002     seq_word = "nucleotide sequences";
34003   } else if (constraint->seq_type == Seqtype_constraint_prot) {
34004     seq_word = "protein sequences";
34005   }
34006 
34007   if (constraint->strand == Strand_constraint_plus) {
34008     strand_word = " on plus strands";
34009   } else if (constraint->strand == Strand_constraint_minus) {
34010     strand_word = " on minus strands";
34011   }
34012 
34013   len = StringLen (fmt) + 1;
34014   if (strand_word != NULL) {
34015     len += StringLen (strand_word);
34016   }
34017   if (seq_word != NULL) {
34018     len += StringLen (seq_word) + 4;
34019   }
34020   if (partial != NULL) {
34021     len += StringLen (partial) + 2;
34022   }
34023   if (location_type != NULL) {
34024     len += StringLen (location_type) + 2;
34025   }
34026   if (dist5 != NULL) {
34027     len += StringLen (dist5) + 1;
34028   }
34029   if (dist3 != NULL) {
34030     len += StringLen (dist3) + 1;
34031   }
34032   str = (CharPtr) MemNew (sizeof (Char) * len);
34033   sprintf (str, "%s", fmt);
34034   if (strand_word == NULL && seq_word != NULL) {
34035     StringCat (str, " on ");
34036     StringCat (str, seq_word);
34037   } else if (strand_word != NULL) {
34038     StringCat (str, strand_word);
34039     if (seq_word != NULL) {
34040       StringCat (str, " of ");
34041       StringCat (str, seq_word);
34042     }
34043   }
34044   if (partial != NULL) {
34045     StringCat (str, " ");
34046     StringCat (str, partial);
34047   }
34048   if (location_type != NULL) {
34049     StringCat (str, " ");
34050     StringCat (str, location_type);
34051   }
34052 
34053   if (dist5 != NULL) {
34054     StringCat (str, " ");
34055     StringCat (str, dist5);
34056     dist5 = MemFree (dist5);
34057   }
34058   if (dist3 != NULL) {
34059     StringCat (str, " ");
34060     StringCat (str, dist3);
34061     dist3 = MemFree (dist3);
34062   }
34063 
34064   return str;
34065 }
34066 
34067 
SummarizeSourceConstraint(SourceConstraintPtr constraint)34068 static CharPtr SummarizeSourceConstraint (SourceConstraintPtr constraint)
34069 {
34070   CharPtr string, intro = NULL, field1, field2;
34071   CharPtr match_fmt = "%s %s matches %s";
34072   CharPtr present_fmt = "%s %s is present";
34073   CharPtr text_fmt = "%s text %s";
34074   CharPtr two_match_fmt = "%s %s matches %s and %s %s";
34075   CharPtr one_match_fmt = "%s %s %s";
34076   CharPtr summ = NULL;
34077 
34078   if (constraint == NULL) return NULL;
34079 
34080   string = SummarizeStringConstraint (constraint->constraint);
34081   field1 = SummarizeSourceQual (constraint->field1);
34082   field2 = SummarizeSourceQual (constraint->field2);
34083 
34084   if (constraint->field1 == NULL && constraint->field2 == NULL && string == NULL) {
34085     if (constraint->type_constraint == Object_type_constraint_feature) {
34086       summ = StringSave ("where source is a feature");
34087     } else if (constraint->type_constraint == Object_type_constraint_descriptor) {
34088       summ = StringSave ("where source is a descriptor");
34089     }
34090   } else {
34091     if (constraint->type_constraint == Object_type_constraint_any) {
34092       intro = "where source";
34093     } else if (constraint->type_constraint == Object_type_constraint_feature) {
34094       intro = "where source feature";
34095     } else if (constraint->type_constraint == Object_type_constraint_descriptor) {
34096       intro = "where source descriptor";
34097     } else {
34098       string = MemFree (string);
34099       field1 = MemFree (field1);
34100       field2 = MemFree (field2);
34101       return NULL;
34102     }
34103 
34104     if (string == NULL) {
34105       if (field1 == NULL && field2 == NULL) {
34106         if (constraint->type_constraint == Object_type_constraint_feature) {
34107           summ = StringSave ("where source is a feature");
34108         } else if (constraint->type_constraint == Object_type_constraint_descriptor) {
34109           summ = StringSave ("where source is a descriptor");
34110         }
34111       } else if (field1 != NULL && field2 != NULL) {
34112         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (field2)));
34113         sprintf (summ, match_fmt, intro, field1, field2);
34114       } else if (field1 != NULL) {
34115         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field1)));
34116         sprintf (summ, present_fmt, intro, field1);
34117       } else if (field2 != NULL) {
34118         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (intro) + StringLen (field2)));
34119         sprintf (summ, present_fmt, intro, field2);
34120       }
34121     } else {
34122       if (field1 == NULL && field2 == NULL) {
34123         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (intro) + StringLen (string)));
34124         sprintf (summ, text_fmt, intro, string);
34125       } else if (field1 != NULL && field2 != NULL) {
34126         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt) + StringLen (intro)
34127                                                    + 2 * StringLen (field1) + StringLen (field2) + StringLen (string)));
34128         sprintf (summ, two_match_fmt, intro, field1, field2, field1, string);
34129       } else if (field1 != NULL) {
34130         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field1) + StringLen (string)));
34131         sprintf (summ, one_match_fmt, intro, field1, string);
34132       } else if (field2 != NULL) {
34133         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (intro) + StringLen (field2) + StringLen (string)));
34134         sprintf (summ, one_match_fmt, intro, field2, string);
34135       }
34136     }
34137   }
34138   string = MemFree (string);
34139   field1 = MemFree (field1);
34140   field2 = MemFree (field2);
34141   return summ;
34142 }
34143 
34144 
34145 
SummarizeCDSGeneProtPseudoConstraint(CDSGeneProtPseudoConstraintPtr constraint)34146 static CharPtr SummarizeCDSGeneProtPseudoConstraint (CDSGeneProtPseudoConstraintPtr constraint)
34147 {
34148   CharPtr summ = NULL, pseudo_feat;
34149   CharPtr is_pseudo_fmt = "where %s is pseudo";
34150   CharPtr not_pseudo_fmt = "where %s is not pseudo";
34151 
34152   if (constraint != NULL) {
34153     pseudo_feat = CDSGeneProtFeatureNameFromFeatureType (constraint->feature);
34154     if (pseudo_feat != NULL) {
34155       if (constraint->is_pseudo) {
34156         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (is_pseudo_fmt) + StringLen (pseudo_feat)));
34157         sprintf (summ, is_pseudo_fmt, pseudo_feat);
34158       } else {
34159         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (not_pseudo_fmt) + StringLen (pseudo_feat)));
34160         sprintf (summ, not_pseudo_fmt, pseudo_feat);
34161       }
34162     }
34163   }
34164 
34165   return summ;
34166 }
34167 
34168 
SummarizeCDSGeneProtQualConstraint(CDSGeneProtQualConstraintPtr constraint)34169 static CharPtr SummarizeCDSGeneProtQualConstraint (CDSGeneProtQualConstraintPtr constraint)
34170 {
34171   CharPtr string, field1 = NULL, field2 = NULL;
34172   CharPtr match_fmt = "where %s matches %s";
34173   CharPtr present_fmt = "where %s is present";
34174   CharPtr text_fmt = "where CDS-gene-prot text %s";
34175   CharPtr two_match_fmt = "where %s matches %s and %s %s";
34176   CharPtr one_match_fmt = "where %s %s";
34177   CharPtr summ = NULL;
34178 
34179   if (constraint == NULL) return NULL;
34180 
34181   string = SummarizeStringConstraint (constraint->constraint);
34182   if (constraint->field1 != NULL && constraint->field1->choice == CDSGeneProtConstraintField_field) {
34183     field1 = CDSGeneProtNameFromField (constraint->field1->data.intvalue);
34184   }
34185   if (constraint->field2 != NULL && constraint->field2->choice == CDSGeneProtConstraintField_field) {
34186     field2 = CDSGeneProtNameFromField (constraint->field2->data.intvalue);
34187   }
34188 
34189   if (string == NULL) {
34190     if (field1 != NULL && field2 != NULL) {
34191       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (match_fmt) + StringLen (field1) + StringLen (field2)));
34192       sprintf (summ, match_fmt, field1, field2);
34193     } else if (field1 != NULL) {
34194       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field1)));
34195       sprintf (summ, present_fmt, field1);
34196     } else if (field2 != NULL) {
34197       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (present_fmt) + StringLen (field2)));
34198       sprintf (summ, present_fmt, field2);
34199     }
34200   } else {
34201     if (field1 == NULL && field2 == NULL) {
34202       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (text_fmt) + StringLen (string)));
34203       sprintf (summ, text_fmt, string);
34204     } else if (field1 != NULL && field2 != NULL) {
34205       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (two_match_fmt)
34206                                                   + 2 * StringLen (field1) + StringLen (field2) + StringLen (string)));
34207       sprintf (summ, two_match_fmt, field1, field2, field1, string);
34208     } else if (field1 != NULL) {
34209       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field1) + StringLen (string)));
34210       sprintf (summ, one_match_fmt, field1, string);
34211     } else if (field2 != NULL) {
34212       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (one_match_fmt) + StringLen (field2) + StringLen (string)));
34213       sprintf (summ, one_match_fmt, field2, string);
34214     }
34215   }
34216 
34217   string = MemFree (string);
34218   /* note - field1 and field2 aren't allocated, so we don't need to free them */
34219 
34220   return summ;
34221 }
34222 
34223 
34224 const CharPtr s_QuantityWords [] = { "exactly", "more than", "less than" };
34225 const Int4 k_NumQuantityWords = sizeof (s_QuantityWords) / sizeof (CharPtr);
34226 
SummarizeFeatureQuantity(ValNodePtr v,CharPtr feature_name)34227 static CharPtr SummarizeFeatureQuantity (ValNodePtr v, CharPtr feature_name)
34228 {
34229   CharPtr fmt = "sequence has %s %d %s%sfeature%s";
34230   Int4    len;
34231   CharPtr summ = NULL;
34232 
34233   if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) {
34234     return NULL;
34235   }
34236 
34237   len = StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15;
34238   if (!StringHasNoText (feature_name)) {
34239     len += StringLen (feature_name);
34240   }
34241 
34242   summ = (CharPtr) MemNew (sizeof (Char) * len);
34243   if (StringHasNoText (feature_name)) {
34244     sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue,
34245                         "", "", v->data.intvalue == 1 ? "" : "s");
34246   } else {
34247     sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue,
34248                         feature_name, " ", v->data.intvalue == 1 ? "" : "s");
34249   }
34250   return summ;
34251 }
34252 
34253 
SummarizeSequenceLength(ValNodePtr v)34254 static CharPtr SummarizeSequenceLength (ValNodePtr v)
34255 {
34256   CharPtr fmt = "sequence is %s %d in length";
34257   CharPtr summ = NULL;
34258 
34259   if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) {
34260     return NULL;
34261   }
34262 
34263   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15));
34264   sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue);
34265   return summ;
34266 }
34267 
34268 
34269 static CharPtr s_SequenceConstraintStrandedness[] = {
34270   "Any",
34271   "sequence contains only minus strand features",
34272   "sequence contains only plus strand features",
34273   "sequence contains at least one minus strand feature",
34274   "sequence contains at least one plus strand feature",
34275   "sequence contains no minus strand features",
34276   "sequence contains no plus strand features"
34277 };
34278 
34279 
SummarizeFeatureStrandedness(Uint2 strandedness)34280 NLM_EXTERN CharPtr SummarizeFeatureStrandedness (Uint2 strandedness)
34281 {
34282   if (strandedness < sizeof (s_SequenceConstraintStrandedness) / sizeof (CharPtr)) {
34283     return s_SequenceConstraintStrandedness[strandedness];
34284   } else {
34285     return NULL;
34286   }
34287 }
34288 
34289 
SummarizeSequenceConstraint(SequenceConstraintPtr constraint)34290 static CharPtr SummarizeSequenceConstraint (SequenceConstraintPtr constraint)
34291 {
34292   CharPtr summ = NULL;
34293   CharPtr seq_word = NULL, featpresent = NULL, id = NULL;
34294   Int4    len = 0;
34295   CharPtr seq_word_intro = "where sequence type is ";
34296   CharPtr feat_after = " is present";
34297   CharPtr id_intro = "sequence ID ";
34298   CharPtr feat_type_quantity = NULL;
34299   CharPtr feat_quantity = NULL;
34300   CharPtr length_quantity = NULL;
34301   CharPtr strandedness = NULL;
34302 
34303   if (IsSequenceConstraintEmpty (constraint)) {
34304     summ = StringSave ("Missing sequence constraint");
34305   } else {
34306     if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) {
34307       switch (constraint->seqtype->choice) {
34308         case SequenceConstraintMolTypeConstraint_nucleotide:
34309           seq_word = "nucleotide";
34310           break;
34311         case SequenceConstraintMolTypeConstraint_dna:
34312           seq_word = "DNA";
34313           break;
34314         case SequenceConstraintMolTypeConstraint_rna:
34315           if (constraint->seqtype->data.intvalue == Sequence_constraint_rnamol_any) {
34316             seq_word = "RNA";
34317           } else {
34318             seq_word = GetBiomolNameForRnaType (constraint->seqtype->data.intvalue);
34319           }
34320           break;
34321         case SequenceConstraintMolTypeConstraint_protein:
34322           seq_word = "protein";
34323           break;
34324       }
34325     }
34326 
34327     if (constraint->feature != Macro_feature_type_any) {
34328       featpresent = GetFeatureNameFromFeatureType (constraint->feature);
34329       if (constraint->num_type_features != NULL) {
34330         feat_type_quantity = SummarizeFeatureQuantity (constraint->num_type_features, featpresent);
34331         featpresent = NULL;
34332       }
34333     }
34334 
34335     if (!IsStringConstraintEmpty (constraint->id)) {
34336       id = SummarizeStringConstraint (constraint->id);
34337     }
34338 
34339     if (seq_word != NULL) {
34340       len += StringLen (seq_word) + StringLen (seq_word_intro);
34341     }
34342 
34343     if (featpresent != NULL) {
34344       if (len == 0) {
34345         len += 6;
34346       } else {
34347         len += 5;
34348       }
34349       len += StringLen (featpresent);
34350       len += StringLen (feat_after);
34351     }
34352 
34353     if (feat_type_quantity != NULL) {
34354       if (len == 0) {
34355         len += 6;
34356       } else {
34357         len += 5;
34358       }
34359       len += StringLen (feat_type_quantity);
34360     }
34361 
34362     if (id != NULL) {
34363       if (len == 0) {
34364         len += 6;
34365       } else {
34366         len += 5;
34367       }
34368       len += StringLen (id_intro);
34369       len += StringLen (id);
34370     }
34371 
34372     feat_quantity = SummarizeFeatureQuantity (constraint->num_features, NULL);
34373     if (feat_quantity != NULL) {
34374       len += StringLen (feat_quantity) + 6;
34375     }
34376 
34377     length_quantity = SummarizeSequenceLength (constraint->length);
34378     if (length_quantity != NULL) {
34379       len += StringLen (length_quantity) + 6;
34380     }
34381 
34382     if (constraint->strandedness > Feature_strandedness_constraint_any) {
34383       strandedness = SummarizeFeatureStrandedness(constraint->strandedness);
34384       len += StringLen (strandedness) + 6;
34385     }
34386 
34387     if (len == 0) {
34388       summ = StringSave ("missing sequence constraint");
34389     } else {
34390       len++;
34391       summ = (CharPtr) MemNew (sizeof (Char) * len);
34392       summ[0] = 0;
34393       if (seq_word != NULL) {
34394         StringCat (summ, seq_word_intro);
34395         StringCat (summ, seq_word);
34396       }
34397       if (featpresent != NULL) {
34398         if (seq_word == NULL) {
34399           StringCat (summ, "where ");
34400         } else {
34401           StringCat (summ, " and ");
34402         }
34403         StringCat (summ, featpresent);
34404         StringCat (summ, feat_after);
34405       }
34406       if (feat_type_quantity != NULL) {
34407         if (summ[0] == 0) {
34408           StringCat (summ, "where ");
34409         } else {
34410           StringCat (summ, " and ");
34411         }
34412         StringCat (summ, feat_type_quantity);
34413       }
34414       if (id != NULL) {
34415         if (seq_word == NULL && featpresent == NULL) {
34416           StringCat (summ, "where ");
34417         } else {
34418           StringCat (summ, " and ");
34419         }
34420         StringCat (summ, id_intro);
34421         StringCat (summ, id);
34422       }
34423       if (feat_quantity != NULL) {
34424         if (StringHasNoText (summ)) {
34425           StringCat (summ, "where ");
34426         } else {
34427           StringCat (summ, " and ");
34428         }
34429         StringCat (summ, feat_quantity);
34430       }
34431       if (length_quantity != NULL) {
34432         if (StringHasNoText (summ)) {
34433           StringCat (summ, "where ");
34434         } else {
34435           StringCat (summ, " and ");
34436         }
34437         StringCat (summ, length_quantity);
34438       }
34439       if (strandedness != NULL) {
34440         if (StringHasNoText (summ)) {
34441           StringCat (summ, "where ");
34442         } else {
34443           StringCat (summ, " and ");
34444         }
34445         StringCat (summ, strandedness);
34446       }
34447     }
34448     id = MemFree (id);
34449     feat_type_quantity = MemFree (feat_type_quantity);
34450     feat_quantity = MemFree (feat_quantity);
34451     length_quantity = MemFree (length_quantity);
34452   }
34453   return summ;
34454 }
34455 
34456 
34457 const CharPtr s_SpecialPubFieldWords [] = { "is present", "is not present", "is all caps", "is all lowercase", "is all punctuation" };
34458 const Int4 k_NumSpecialPubFieldWords = sizeof (s_SpecialPubFieldWords) / sizeof (CharPtr);
34459 
SummarizePubFieldSpecialConstraint(PubFieldSpecialConstraintPtr field)34460 static CharPtr SummarizePubFieldSpecialConstraint (PubFieldSpecialConstraintPtr field)
34461 {
34462   CharPtr fmt = "%s %s";
34463   CharPtr label, summ = NULL;
34464 
34465   if (field == NULL || field->constraint == NULL
34466       || field->constraint->choice < 1
34467       || field->constraint->choice > k_NumSpecialPubFieldWords) {
34468     return NULL;
34469   }
34470 
34471   label = GetPubFieldLabel (field->field);
34472 
34473   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label)
34474                                             + StringLen (s_SpecialPubFieldWords[field->constraint->choice - 1])));
34475   sprintf (summ, fmt, label, s_SpecialPubFieldWords[field->constraint->choice - 1]);
34476   return summ;
34477 }
34478 
34479 
SummarizePubFieldConstraint(PubFieldConstraintPtr field)34480 static CharPtr SummarizePubFieldConstraint (PubFieldConstraintPtr field)
34481 {
34482   CharPtr fmt = "%s %s", summ = NULL;
34483   CharPtr string, label;
34484 
34485   if (field == NULL || field->constraint == NULL) {
34486     return NULL;
34487   }
34488 
34489   string = SummarizeStringConstraint (field->constraint);
34490   label = GetPubFieldLabel (field->field);
34491 
34492   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string)));
34493   sprintf (summ, fmt, label, string);
34494   string = MemFree (string);
34495   return summ;
34496 }
34497 
34498 
SummarizePublicationConstraint(PublicationConstraintPtr constraint)34499 static CharPtr SummarizePublicationConstraint (PublicationConstraintPtr constraint)
34500 {
34501   CharPtr type = NULL, field = NULL, special = NULL, summ = NULL;
34502   Boolean first = TRUE;
34503   Int4 len;
34504 
34505   if (IsPublicationConstraintEmpty (constraint)) return NULL;
34506 
34507   switch (constraint->type) {
34508     case Pub_type_published:
34509       type = "pub is published";
34510       break;
34511     case Pub_type_unpublished:
34512       type = "pub is unpublished";
34513       break;
34514     case Pub_type_in_press:
34515       type = "pub is in press";
34516       break;
34517     case Pub_type_submitter_block:
34518       type = "pub is submitter block";
34519       break;
34520   }
34521 
34522   field = SummarizePubFieldConstraint (constraint->field);
34523   special = SummarizePubFieldSpecialConstraint (constraint->special_field);
34524 
34525   if (type == NULL && field == NULL && special == NULL) {
34526     return NULL;
34527   }
34528 
34529   len = 17 + StringLen (type) + StringLen (field) + StringLen (special);
34530   summ = (CharPtr) MemNew (sizeof (Char) * len);
34531   sprintf (summ, "where ");
34532   if (type != NULL) {
34533     StringCat (summ, type);
34534     first = FALSE;
34535   }
34536   if (field != NULL) {
34537     if (!first) {
34538       StringCat (summ, " and ");
34539     }
34540     StringCat (summ, field);
34541     first = FALSE;
34542   }
34543 
34544   if (special != NULL) {
34545     if (!first) {
34546       StringCat (summ, " and ");
34547     }
34548     StringCat (summ, special);
34549     first = FALSE;
34550   }
34551 
34552   field = MemFree (field);
34553   special = MemFree (special);
34554 
34555   return summ;
34556 }
34557 
34558 
SummarizeFieldConstraint(FieldConstraintPtr constraint)34559 static CharPtr SummarizeFieldConstraint (FieldConstraintPtr constraint)
34560 {
34561   CharPtr rval = NULL;
34562   CharPtr string = NULL, label = NULL;
34563   CharPtr fmt = "where %s %s";
34564 
34565   if (IsFieldConstraintEmpty (constraint)) return NULL;
34566 
34567   string = SummarizeStringConstraint (constraint->string_constraint);
34568   label = SummarizeFieldType (constraint->field);
34569 
34570   if (string != NULL && label != NULL) {
34571     rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label) + StringLen (string)));
34572     sprintf (rval, fmt, label, string);
34573   }
34574   string = MemFree (string);
34575   label = MemFree (label);
34576 
34577   return rval;
34578 }
34579 
34580 
SummarizeMissingFieldConstraint(FieldTypePtr field)34581 static CharPtr SummarizeMissingFieldConstraint (FieldTypePtr field)
34582 {
34583   CharPtr rval = NULL;
34584   CharPtr label = NULL;
34585   CharPtr fmt = "where %s is missing";
34586 
34587   if (field == NULL) return NULL;
34588 
34589   label = SummarizeFieldType (field);
34590 
34591   if (label != NULL) {
34592     rval = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (label)));
34593     sprintf (rval, fmt, label);
34594   }
34595   label = MemFree (label);
34596 
34597   return rval;
34598 }
34599 
34600 
SummarizeMolinfoFieldConstraint(MolinfoFieldConstraintPtr constraint)34601 static CharPtr SummarizeMolinfoFieldConstraint (MolinfoFieldConstraintPtr constraint)
34602 {
34603   CharPtr label, cp;
34604   CharPtr fmt = "where %s is%s %s";
34605   CharPtr rval = NULL;
34606   Int4    len, offset;
34607 
34608   if (IsMolinfoFieldConstraintEmpty(constraint)) {
34609     return NULL;
34610   }
34611   label = GetSequenceQualName (constraint->field);
34612   if (label == NULL) {
34613     return NULL;
34614   }
34615   cp = StringChr (label, ' ');
34616   if (cp == NULL) {
34617     return NULL;
34618   }
34619   offset = cp - label;
34620   len = StringLen (fmt) + StringLen (label);
34621   if (constraint->is_not) {
34622     len += 4;
34623   }
34624   rval = (CharPtr) MemNew (sizeof (Char) * len);
34625   sprintf (rval, "where %s", label);
34626   StringCpy (rval + 7 + offset, constraint->is_not ? "is not " : "is ");
34627   StringCat (rval, cp + 1);
34628 
34629   return rval;
34630 }
34631 
34632 
IsTranslationConstraintEmpty(TranslationConstraintPtr constraint)34633 NLM_EXTERN Boolean IsTranslationConstraintEmpty (TranslationConstraintPtr constraint)
34634 {
34635   if (constraint == NULL) {
34636     return TRUE;
34637   } else if (constraint->num_mismatches != NULL) {
34638     return FALSE;
34639   } else if (constraint->internal_stops != Match_type_constraint_dont_care) {
34640     return FALSE;
34641   } else if (!IsStringConstraintEmpty (constraint->actual_strings)) {
34642     return FALSE;
34643   } else if (!IsStringConstraintEmpty (constraint->transl_strings)) {
34644     return FALSE;
34645   } else {
34646     return TRUE;
34647   }
34648 }
34649 
34650 
SummarizeTranslationMismatches(ValNodePtr v)34651 static CharPtr SummarizeTranslationMismatches (ValNodePtr v)
34652 {
34653   CharPtr fmt = "there are %s %d mismatches between the actual and translated protein sequences";
34654   CharPtr summ = NULL;
34655 
34656   if (v == NULL || v->choice < 1 || v->choice > k_NumQuantityWords) {
34657     return NULL;
34658   }
34659 
34660   summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (s_QuantityWords[v->choice - 1]) + 15));
34661   sprintf (summ, fmt, s_QuantityWords[v->choice - 1], v->data.intvalue);
34662   return summ;
34663 }
34664 
34665 
SummarizeTranslationConstraint(TranslationConstraintPtr constraint)34666 static CharPtr SummarizeTranslationConstraint (TranslationConstraintPtr constraint)
34667 {
34668   CharPtr rval = NULL;
34669   CharPtr mismatch = NULL;
34670   CharPtr tmp;
34671   CharPtr where_actual_sequence = "where actual sequence ";
34672   CharPtr where_transl_sequence = "where translated sequence ";
34673   CharPtr has_internal_stops = "sequence has internal stops";
34674   CharPtr no_internal_stops = "sequence has no internal stops";
34675   Int4    len = 0;
34676   StringConstraintPtr scp;
34677   ValNodePtr actual_phrases = NULL, transl_phrases = NULL, vnp;
34678   Int4 num_phrases = 0, phrase_num = 1;
34679 
34680   if (IsTranslationConstraintEmpty(constraint)) {
34681     return NULL;
34682   }
34683 
34684   if (constraint->actual_strings != NULL) {
34685     len += StringLen (where_actual_sequence);
34686     for (scp = constraint->actual_strings; scp != NULL; scp = scp->next) {
34687       tmp = SummarizeStringConstraint (scp);
34688       if (tmp != NULL) {
34689         len += StringLen (tmp) + 2;
34690         ValNodeAddPointer (&actual_phrases, 0, tmp);
34691       }
34692     }
34693     len += 5;
34694     num_phrases ++;
34695   }
34696   if (constraint->transl_strings != NULL) {
34697     len += StringLen (where_transl_sequence);
34698     for (scp = constraint->transl_strings; scp != NULL; scp = scp->next) {
34699       tmp = SummarizeStringConstraint (scp);
34700       if (tmp != NULL) {
34701         len += StringLen (tmp) + 2;
34702         ValNodeAddPointer (&transl_phrases, 0, tmp);
34703       }
34704     }
34705     len += 5;
34706     num_phrases ++;
34707   }
34708 
34709   if (constraint->num_mismatches != NULL) {
34710     mismatch = SummarizeTranslationMismatches(constraint->num_mismatches);
34711     len += StringLen (mismatch) + 5;
34712     num_phrases ++;
34713   }
34714 
34715   if (constraint->internal_stops == Match_type_constraint_yes) {
34716     len += StringLen (has_internal_stops) + 5;
34717     num_phrases ++;
34718   } else if (constraint->internal_stops == Match_type_constraint_no) {
34719     len += StringLen (no_internal_stops) + 5;
34720     num_phrases ++;
34721   }
34722 
34723   rval = (CharPtr) MemNew (sizeof (Char) * len);
34724   rval[0] = 0;
34725   if (actual_phrases != NULL) {
34726     StringCat (rval, where_actual_sequence);
34727     for (vnp = actual_phrases; vnp != NULL; vnp = vnp->next) {
34728       StringCat (rval, vnp->data.ptrvalue);
34729       if (vnp->next != NULL) {
34730         StringCat (rval, ", ");
34731       }
34732     }
34733     actual_phrases = ValNodeFreeData (actual_phrases);
34734     phrase_num++;
34735   }
34736 
34737   if (transl_phrases != NULL) {
34738     if (phrase_num > 1) {
34739       if (num_phrases > 2) {
34740         StringCat (rval, ", ");
34741       }
34742       if (phrase_num == num_phrases) {
34743         StringCat (rval, " and ");
34744       }
34745     }
34746     StringCat (rval, where_transl_sequence);
34747     for (vnp = transl_phrases; vnp != NULL; vnp = vnp->next) {
34748       StringCat (rval, vnp->data.ptrvalue);
34749       if (vnp->next != NULL) {
34750         StringCat (rval, ", ");
34751       }
34752     }
34753     transl_phrases = ValNodeFreeData (transl_phrases);
34754     phrase_num++;
34755   }
34756 
34757   if (mismatch != NULL) {
34758     if (phrase_num > 1) {
34759       if (num_phrases > 2) {
34760         StringCat (rval, ", ");
34761       }
34762       if (phrase_num == num_phrases) {
34763         StringCat (rval, " and ");
34764       }
34765     }
34766     StringCat (rval, mismatch);
34767     mismatch = MemFree (mismatch);
34768     phrase_num++;
34769   }
34770 
34771   if (constraint->internal_stops == Match_type_constraint_yes) {
34772     if (phrase_num > 1) {
34773       if (num_phrases > 2) {
34774         StringCat (rval, ", ");
34775       }
34776       if (phrase_num == num_phrases) {
34777         StringCat (rval, " and ");
34778       }
34779     }
34780     StringCat (rval, has_internal_stops);
34781     phrase_num++;
34782   } else if (constraint->internal_stops == Match_type_constraint_yes) {
34783     len += StringLen (no_internal_stops) + 5;
34784     if (phrase_num > 1) {
34785       if (num_phrases > 2) {
34786         StringCat (rval, ", ");
34787       }
34788       if (phrase_num == num_phrases) {
34789         StringCat (rval, " and ");
34790       }
34791     }
34792     StringCat (rval, no_internal_stops);
34793     phrase_num++;
34794   }
34795 
34796   return rval;
34797 }
34798 
34799 
SummarizeConstraint(ValNodePtr constraint)34800 NLM_EXTERN CharPtr SummarizeConstraint (ValNodePtr constraint)
34801 {
34802   CharPtr phrase = NULL, tmp;
34803   CharPtr fmt = "where object text %s";
34804 
34805   if (constraint == NULL) return NULL;
34806   switch (constraint->choice) {
34807     case ConstraintChoice_string:
34808       tmp = SummarizeStringConstraint (constraint->data.ptrvalue);
34809       if (tmp != NULL) {
34810         phrase = (CharPtr) MemNew (sizeof (Char) * (StringLen (tmp) + StringLen (fmt)));
34811         sprintf (phrase, fmt, tmp);
34812         tmp = MemFree (tmp);
34813       }
34814       break;
34815     case ConstraintChoice_location:
34816       phrase = SummarizeLocationConstraint (constraint->data.ptrvalue);
34817       break;
34818     case ConstraintChoice_source:
34819       phrase = SummarizeSourceConstraint (constraint->data.ptrvalue);
34820       break;
34821     case ConstraintChoice_cdsgeneprot_qual:
34822       phrase = SummarizeCDSGeneProtQualConstraint (constraint->data.ptrvalue);
34823       break;
34824     case ConstraintChoice_cdsgeneprot_pseudo:
34825       phrase = SummarizeCDSGeneProtPseudoConstraint (constraint->data.ptrvalue);
34826       break;
34827     case ConstraintChoice_sequence:
34828       phrase = SummarizeSequenceConstraint (constraint->data.ptrvalue);
34829       break;
34830     case ConstraintChoice_pub:
34831       phrase = SummarizePublicationConstraint (constraint->data.ptrvalue);
34832       break;
34833     case ConstraintChoice_field:
34834       phrase = SummarizeFieldConstraint (constraint->data.ptrvalue);
34835       break;
34836     case ConstraintChoice_molinfo:
34837       phrase = SummarizeMolinfoFieldConstraint (constraint->data.ptrvalue);
34838       break;
34839     case ConstraintChoice_field_missing:
34840       phrase = SummarizeMissingFieldConstraint (constraint->data.ptrvalue);
34841       break;
34842     case ConstraintChoice_translation:
34843       phrase = SummarizeTranslationConstraint (constraint->data.ptrvalue);
34844       break;
34845   }
34846   return phrase;
34847 }
34848 
34849 
SummarizeConstraintSet(ValNodePtr constraint_set)34850 NLM_EXTERN CharPtr SummarizeConstraintSet (ValNodePtr constraint_set)
34851 {
34852   ValNodePtr phrases = NULL, vnp;
34853   Int4 len = 0;
34854   CharPtr phrase, str = NULL;
34855 
34856   while (constraint_set != NULL) {
34857     phrase = SummarizeConstraint (constraint_set);
34858     if (phrase != NULL) {
34859       ValNodeAddPointer (&phrases, 0, phrase);
34860       if (len > 0) {
34861         len += 5; /* for " and " */
34862       } else {
34863         len += 1; /* for terminal NULL */
34864       }
34865       len += StringLen (phrase);
34866     }
34867     constraint_set = constraint_set->next;
34868   }
34869   if (len > 0) {
34870     str = (CharPtr) MemNew (sizeof (Char) * len);
34871     for (vnp = phrases; vnp != NULL; vnp = vnp->next) {
34872       StringCat (str, vnp->data.ptrvalue);
34873       if (vnp->next != NULL) {
34874         StringCat (str, " and ");
34875       }
34876     }
34877   }
34878   return str;
34879 }
34880 
34881 
34882 /* for table readers that use the macro language functions */
34883 
34884 /* MatchType is used to represent how the column should be matched.
34885  */
34886 
MatchTypeNew()34887 NLM_EXTERN MatchTypePtr MatchTypeNew ()
34888 {
34889   MatchTypePtr match_type = MemNew (sizeof (MatchTypeData));
34890   match_type->data = NULL;
34891   match_type->match_location = String_location_equals;
34892   match_type->choice = eTableMatchNucID;
34893   return match_type;
34894 }
34895 
34896 
MatchTypeFree(MatchTypePtr match_type)34897 NLM_EXTERN MatchTypePtr MatchTypeFree (MatchTypePtr match_type)
34898 {
34899   if (match_type != NULL) {
34900     if (match_type->choice == eTableMatchSourceQual) {
34901       match_type->data = SourceQualChoiceFree (match_type->data);
34902     }
34903     match_type = MemFree (match_type);
34904   }
34905   return match_type;
34906 }
34907 
34908 
MatchTypeCopy(MatchTypePtr orig)34909 static MatchTypePtr MatchTypeCopy (MatchTypePtr orig)
34910 {
34911   MatchTypePtr match_type = NULL;
34912 
34913   if (orig != NULL) {
34914     match_type = MatchTypeNew();
34915     match_type->choice = orig->choice;
34916     match_type->match_location = orig->match_location;
34917     if (match_type->choice == eTableMatchSourceQual) {
34918       match_type->data = AsnIoMemCopy (orig->data, (AsnReadFunc) SourceQualChoiceAsnRead, (AsnWriteFunc) SourceQualChoiceAsnWrite);
34919     }
34920   }
34921   return match_type;
34922 }
34923 
34924 
FindMatchTypeInHeader(ValNodePtr columns)34925 static MatchTypePtr FindMatchTypeInHeader (ValNodePtr columns)
34926 {
34927   ValNodePtr col_vnp;
34928   MatchTypePtr match_type = NULL;
34929   TabColumnConfigPtr t;
34930 
34931   for (col_vnp = columns;
34932         col_vnp != NULL && match_type == NULL;
34933         col_vnp = col_vnp->next) {
34934     t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
34935     if (t != NULL && t->match_type != NULL) {
34936       match_type = MatchTypeCopy (t->match_type);
34937     }
34938   }
34939   return match_type;
34940 }
34941 
34942 
TabColumnConfigNew(void)34943 NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void)
34944 {
34945   TabColumnConfigPtr t;
34946 
34947   t = (TabColumnConfigPtr) MemNew (sizeof (TabColumnConfigData));
34948   t->match_type = NULL;
34949   t->field = NULL;
34950   t->existing_text = ExistingTextOption_replace_old;
34951   t->constraint = NULL;
34952   t->skip_blank = TRUE;
34953   return t;
34954 }
34955 
34956 
TabColumnConfigFree(TabColumnConfigPtr t)34957 NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t)
34958 {
34959   if (t != NULL) {
34960     t->field = FieldTypeFree (t->field);
34961     t->match_type = MatchTypeFree (t->match_type);
34962     t->constraint = ConstraintChoiceSetFree (t->constraint);
34963     t = MemFree (t);
34964   }
34965   return t;
34966 }
34967 
34968 
TabColumnConfigCopy(TabColumnConfigPtr orig)34969 NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig)
34970 {
34971   TabColumnConfigPtr t = NULL;
34972 
34973   if (orig != NULL) {
34974     t = TabColumnConfigNew ();
34975 
34976     t->match_type = MatchTypeCopy (orig->match_type);
34977     t->existing_text = orig->existing_text;
34978     t->skip_blank = orig->skip_blank;
34979     t->match_mrna = orig->match_mrna;
34980     t->field = FieldTypeCopy (orig->field);
34981     t->constraint = AsnIoMemCopy (orig->constraint, (AsnReadFunc) ConstraintChoiceSetAsnRead, (AsnWriteFunc) ConstraintChoiceSetAsnWrite);
34982   }
34983   return t;
34984 }
34985 
34986 
TabColumnConfigReset(TabColumnConfigPtr t)34987 NLM_EXTERN void TabColumnConfigReset (TabColumnConfigPtr t)
34988 {
34989   if (t != NULL) {
34990     t->match_type = MatchTypeFree (t->match_type);
34991     t->field = FieldTypeFree (t->field);
34992     t->constraint = ConstraintChoiceSetFree (t->constraint);
34993     t->existing_text = ExistingTextOption_replace_old;
34994     t->skip_blank = TRUE;
34995     t->match_mrna = FALSE;
34996   }
34997 }
34998 
34999 
TabColumnConfigListFree(ValNodePtr columns)35000 NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns)
35001 {
35002   ValNodePtr vnp_next;
35003 
35004   while (columns != NULL) {
35005     vnp_next = columns->next;
35006     columns->data.ptrvalue = TabColumnConfigFree (columns->data.ptrvalue);
35007     columns->next = NULL;
35008     columns = ValNodeFree (columns);
35009     columns = vnp_next;
35010   }
35011   return columns;
35012 }
35013 
35014 
TabColumnConfigListCopy(ValNodePtr orig)35015 NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig)
35016 {
35017   ValNodePtr new_list = NULL;
35018   TabColumnConfigPtr t;
35019 
35020   while (orig != NULL) {
35021     t = TabColumnConfigCopy (orig->data.ptrvalue);
35022     ValNodeAddPointer (&new_list, 0, t);
35023     orig = orig->next;
35024   }
35025   return new_list;
35026 }
35027 
35028 
MatchTypeFromTableMatchType(TableMatchPtr t)35029 NLM_EXTERN MatchTypePtr MatchTypeFromTableMatchType (TableMatchPtr t)
35030 {
35031   MatchTypePtr m;
35032 
35033   if (t == NULL) {
35034     return NULL;
35035   }
35036   m = MatchTypeNew ();
35037   m->match_location = t->match_location;
35038   if (t->match_type != NULL) {
35039     switch (t->match_type->choice) {
35040       case TableMatchType_feature_id:
35041         m->choice = eTableMatchFeatureID;
35042         break;
35043       case TableMatchType_gene_locus_tag:
35044         m->choice = eTableMatchGeneLocusTag;
35045         break;
35046       case TableMatchType_protein_id:
35047         m->choice = eTableMatchProteinID;
35048         break;
35049       case TableMatchType_dbxref:
35050         m->choice = eTableMatchDbxref;
35051         break;
35052       case TableMatchType_nuc_id:
35053         m->choice = eTableMatchNucID;
35054         break;
35055       case TableMatchType_src_qual:
35056         m->choice = eTableMatchSourceQual;
35057         m->data = AsnIoMemCopy (t->match_type->data.ptrvalue,
35058                                 (AsnReadFunc)SourceQualChoiceAsnRead,
35059                                 (AsnWriteFunc)SourceQualChoiceAsnWrite);
35060         break;
35061       case TableMatchType_protein_name:
35062         m->choice = eTableMatchProteinName;
35063         break;
35064       case TableMatchType_bioproject:
35065         m->choice = eTableMatchBioProject;
35066         break;
35067       case TableMatchType_any:
35068         m->choice = eTableMatchAny;
35069         break;
35070     }
35071   }
35072   return m;
35073 }
35074 
35075 
TableMatchTypeFromMatchType(MatchTypePtr m)35076 NLM_EXTERN TableMatchPtr TableMatchTypeFromMatchType (MatchTypePtr m)
35077 {
35078   TableMatchPtr t;
35079   ValNodePtr    s;
35080 
35081   if (m == NULL) {
35082     return NULL;
35083   }
35084   t = TableMatchNew ();
35085   t->match_location = m->match_location;
35086   t->match_type = ValNodeNew (NULL);
35087   switch (m->choice) {
35088     case eTableMatchFeatureID:
35089       t->match_type->choice = TableMatchType_feature_id;
35090       break;
35091     case eTableMatchGeneLocusTag:
35092       t->match_type->choice = TableMatchType_gene_locus_tag;
35093       break;
35094     case eTableMatchProteinID:
35095       t->match_type->choice = TableMatchType_protein_id;
35096       break;
35097     case eTableMatchDbxref:
35098       t->match_type->choice = TableMatchType_dbxref;
35099       break;
35100     case eTableMatchNucID:
35101       t->match_type->choice = TableMatchType_nuc_id;
35102       break;
35103     case eTableMatchSourceQual:
35104       t->match_type->choice = TableMatchType_src_qual;
35105       t->match_type->data.ptrvalue = AsnIoMemCopy (m->data,
35106                                 (AsnReadFunc)SourceQualChoiceAsnRead,
35107                                 (AsnWriteFunc)SourceQualChoiceAsnWrite);
35108       break;
35109     case eTableMatchBioSource:
35110       t->match_type->choice = TableMatchType_src_qual;
35111       s = ValNodeNew (NULL);
35112       s->choice = SourceQualChoice_textqual;
35113       s->data.intvalue = Source_qual_taxname;
35114       t->match_type->data.ptrvalue = s;
35115       break;
35116     case eTableMatchProteinName:
35117       t->match_type->choice = TableMatchType_protein_name;
35118       break;
35119     case eTableMatchBioProject:
35120       t->match_type->choice = TableMatchType_bioproject;
35121       break;
35122     case eTableMatchAny:
35123       t->match_type->choice = TableMatchType_any;
35124       break;
35125   }
35126   return t;
35127 }
35128 
35129 
CheckForDuplicateColumns(ValNodePtr columns)35130 NLM_EXTERN ValNodePtr CheckForDuplicateColumns (ValNodePtr columns)
35131 {
35132   ValNodePtr vnp, vnp2;
35133   TabColumnConfigPtr t, t2;
35134   ValNodePtr duplicate_column_list = NULL;
35135   Boolean    this_dup;
35136 
35137   for (vnp = columns; vnp != NULL; vnp = vnp->next) {
35138     this_dup = FALSE;
35139     t = (TabColumnConfigPtr) vnp->data.ptrvalue;
35140     if (t != NULL && t->field != NULL) {
35141       for (vnp2 = vnp->next; vnp2 != NULL && !this_dup; vnp2 = vnp2->next) {
35142         t2 = (TabColumnConfigPtr) vnp2->data.ptrvalue;
35143         if (t2 != NULL && CompareFieldTypes(t->field, t2->field) == 0 && t2->existing_text != ExistingTextOption_add_qual) {
35144           ValNodeAddPointer (&duplicate_column_list, 0, SummarizeFieldType (t->field));
35145           this_dup = TRUE;
35146         }
35147       }
35148     }
35149   }
35150   duplicate_column_list = ValNodeSort (duplicate_column_list, SortVnpByString);
35151   ValNodeUnique (&duplicate_column_list, SortVnpByString, ValNodeFreeData);
35152   return duplicate_column_list;
35153 }
35154 
35155 
FixDuplicateColumns(ValNodePtr columns)35156 NLM_EXTERN void FixDuplicateColumns (ValNodePtr columns)
35157 {
35158   ValNodePtr vnp, vnp2;
35159   TabColumnConfigPtr t, t2;
35160 
35161   if (columns == NULL || columns->next == NULL) {
35162     return;
35163   }
35164 
35165   for (vnp = columns; vnp != NULL; vnp = vnp->next) {
35166     t = (TabColumnConfigPtr) vnp->data.ptrvalue;
35167     if (t->field != NULL) {
35168       for (vnp2 = vnp->next; vnp2 != NULL; vnp2 = vnp2->next) {
35169         t2 = (TabColumnConfigPtr) vnp2->data.ptrvalue;
35170         if (CompareFieldTypes(t->field, t2->field) == 0 && t2->existing_text != ExistingTextOption_add_qual) {
35171           t2->existing_text = ExistingTextOption_add_qual;
35172         }
35173       }
35174     }
35175   }
35176 }
35177 
35178 
35179 /* This checks the column names and returns a list of the feature fields */
ValidateFeatureFieldColumnNames(ValNodePtr header_line,ValNodePtr PNTR perr_list)35180 NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, ValNodePtr PNTR perr_list)
35181 {
35182   ValNodePtr         header_vnp;
35183   ValNodePtr         err_list = NULL, col_list = NULL;
35184   Boolean            rval = TRUE;
35185   TabColumnConfigPtr t;
35186   FeatureFieldPtr    field;
35187   Int4               featqual, feat_type;
35188   CharPtr            first_space;
35189 
35190   if (header_line == NULL)
35191   {
35192     return FALSE;
35193   }
35194 
35195   header_vnp = header_line->data.ptrvalue;
35196   if (header_vnp == NULL || header_vnp->next == NULL)
35197   {
35198     return FALSE;
35199   }
35200 
35201   /* skip ID column */
35202   header_vnp = header_vnp->next;
35203   while (header_vnp != NULL && rval)
35204   {
35205     first_space = StringChr (header_vnp->data.ptrvalue, ' ');
35206     if (first_space != NULL) {
35207       *first_space = 0;
35208       feat_type = GetFeatureTypeByName (header_vnp->data.ptrvalue);
35209       featqual = GetFeatQualByName (first_space + 1);
35210       *first_space = ' ';
35211       if (feat_type < 0 || featqual < 0) {
35212         /* unable to recognize column name */
35213         ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue));
35214         /* if we're not able to send back a list of errors, just quit now */
35215         if (perr_list == NULL) {
35216           rval = FALSE;
35217         }
35218       } else if (err_list == NULL) {
35219         /* if we've already found errors, don't bother collecting more fields */
35220         field = FeatureFieldNew ();
35221         field->type = feat_type;
35222         field->field = ValNodeNew (NULL);
35223         field->field->choice = FeatQualChoice_legal_qual;
35224         field->field->data.intvalue = featqual;
35225         t = TabColumnConfigNew ();
35226         t->field = ValNodeNew (NULL);
35227         t->field->choice = FieldType_feature_field;
35228         t->field->data.ptrvalue = field;
35229         ValNodeAddPointer (&col_list, 0, t);
35230       }
35231     } else {
35232       featqual = GetFeatQualByName (header_vnp->data.ptrvalue);
35233       if (featqual < 0) {
35234         /* unable to recognize column name */
35235         ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue));
35236         /* if we're not able to send back a list of errors, just quit now */
35237         if (perr_list == NULL) {
35238           rval = FALSE;
35239         }
35240       } else if (err_list == NULL) {
35241         /* if we've already found errors, don't bother collecting more fields */
35242         field = FeatureFieldNew ();
35243         field->type = Macro_feature_type_any;
35244         field->field = ValNodeNew (NULL);
35245         field->field->choice = FeatQualChoice_legal_qual;
35246         field->field->data.intvalue = featqual;
35247         t = TabColumnConfigNew ();
35248         t->field = ValNodeNew (NULL);
35249         t->field->choice = FieldType_feature_field;
35250         t->field->data.ptrvalue = field;
35251         ValNodeAddPointer (&col_list, 0, t);
35252       }
35253     }
35254     header_vnp = header_vnp->next;
35255   }
35256   if (err_list != NULL) {
35257     col_list = TabColumnConfigListFree (col_list);
35258     if (perr_list != NULL) {
35259       *perr_list = err_list;
35260     } else {
35261       err_list = ValNodeFreeData (err_list);
35262     }
35263   }
35264   return col_list;
35265 }
35266 
35267 typedef struct findgenelocustag {
35268   CharPtr locus_tag;
35269   ValNodePtr gene_list;
35270 } FindGeneLocusTagData, PNTR FindGeneLocusTagPtr;
35271 
FindGeneByLocusTagBioseqCallback(BioseqPtr bsp,Pointer userdata)35272 static void FindGeneByLocusTagBioseqCallback (BioseqPtr bsp, Pointer userdata)
35273 {
35274   FindGeneLocusTagPtr p;
35275   SeqFeatPtr          gene;
35276   SeqMgrFeatContext   fcontext;
35277 
35278   if (bsp == NULL || userdata == NULL || !ISA_na (bsp->mol)) {
35279     return;
35280   }
35281 
35282   p = (FindGeneLocusTagPtr) userdata;
35283 
35284   gene = SeqMgrGetGeneByLocusTag (bsp, p->locus_tag, &fcontext);
35285   if (gene != NULL) {
35286     ValNodeAddPointer (&p->gene_list, OBJ_SEQFEAT, gene);
35287   }
35288 }
35289 
35290 
35291 typedef struct objbymatch {
35292   ValNodePtr obj_list;
35293   StringConstraintPtr scp;
35294 } ObjByMatchData, PNTR ObjByMatchPtr;
35295 
GetFeaturesByDbxrefCallback(SeqFeatPtr sfp,Pointer userdata)35296 static void GetFeaturesByDbxrefCallback (SeqFeatPtr sfp, Pointer userdata)
35297 {
35298   ObjByMatchPtr p;
35299   ValNodePtr    vnp;
35300   DbtagPtr      dbt;
35301   Char          buf[20];
35302   Boolean       found = FALSE;
35303 
35304   if (sfp == NULL || sfp->dbxref == NULL || userdata == NULL) return;
35305   p = (ObjByMatchPtr) userdata;
35306 
35307   if (IsStringConstraintEmpty (p->scp)) return;
35308 
35309   for (vnp = sfp->dbxref; vnp != NULL && !found; vnp = vnp->next) {
35310     dbt = (DbtagPtr) vnp->data.ptrvalue;
35311     if (dbt != NULL && dbt->tag != NULL) {
35312       if (dbt->tag->id > 0) {
35313         sprintf (buf, "%d", dbt->tag->id);
35314         if (DoesStringMatchConstraint (buf, p->scp)) {
35315           found = TRUE;
35316         }
35317       } else if (DoesStringMatchConstraint (dbt->tag->str, p->scp)) {
35318         found = TRUE;
35319       }
35320     }
35321   }
35322   if (found) {
35323     ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp);
35324   }
35325 }
35326 
35327 
GetFeaturesByDbxref(SeqEntryPtr sep,CharPtr dbxref,Uint1 match_location)35328 static ValNodePtr GetFeaturesByDbxref (SeqEntryPtr sep, CharPtr dbxref, Uint1 match_location)
35329 {
35330   ObjByMatchData d;
35331 
35332   d.scp = StringConstraintNew ();
35333   d.scp->match_text = StringSave (dbxref);
35334   d.scp->match_location = match_location;
35335   d.obj_list = NULL;
35336   VisitFeaturesInSep (sep, &d, GetFeaturesByDbxrefCallback);
35337   d.scp = StringConstraintFree (d.scp);
35338   return d.obj_list;
35339 }
35340 
35341 
GetBioSourcesByTaxNameDescriptorCallback(SeqDescrPtr sdp,Pointer userdata)35342 static void GetBioSourcesByTaxNameDescriptorCallback (SeqDescrPtr sdp, Pointer userdata)
35343 {
35344   ObjByMatchPtr p;
35345   BioSourcePtr  biop;
35346 
35347   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
35348   p = (ObjByMatchPtr) userdata;
35349 
35350   if (IsStringConstraintEmpty (p->scp)) return;
35351 
35352   biop = (BioSourcePtr) sdp->data.ptrvalue;
35353   if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) {
35354     ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp);
35355   }
35356 
35357 }
35358 
35359 
GetBioSourcesByTaxNameFeatureCallback(SeqFeatPtr sfp,Pointer userdata)35360 static void GetBioSourcesByTaxNameFeatureCallback (SeqFeatPtr sfp, Pointer userdata)
35361 {
35362   ObjByMatchPtr p;
35363   BioSourcePtr biop;
35364 
35365   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
35366   p = (ObjByMatchPtr) userdata;
35367 
35368   if (IsStringConstraintEmpty (p->scp)) return;
35369 
35370   biop = (BioSourcePtr) sfp->data.value.ptrvalue;
35371   if (biop != NULL && biop->org != NULL && DoesStringMatchConstraint (biop->org->taxname, p->scp)) {
35372     ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp);
35373   }
35374 
35375 }
35376 
35377 
GetBioSourcesByTaxName(SeqEntryPtr sep,CharPtr taxname,Uint1 match_location)35378 static ValNodePtr GetBioSourcesByTaxName (SeqEntryPtr sep, CharPtr taxname, Uint1 match_location)
35379 {
35380   ObjByMatchData d;
35381 
35382   d.scp = StringConstraintNew ();
35383   d.scp->match_text = StringSave (taxname);
35384   d.scp->match_location = match_location;
35385   d.obj_list = NULL;
35386   VisitDescriptorsInSep (sep, &d, GetBioSourcesByTaxNameDescriptorCallback);
35387 
35388   VisitFeaturesInSep (sep, &d, GetBioSourcesByTaxNameFeatureCallback);
35389   d.scp = StringConstraintFree (d.scp);
35390   return d.obj_list;
35391 }
35392 
35393 
35394 typedef struct objbystrinfld {
35395   ValNodePtr obj_list;
35396   FieldTypePtr field;
35397   StringConstraintPtr scp;
35398 } ObjByStrInFldData, PNTR ObjByStrInFldPtr;
35399 
35400 
GetBioSourcesBySourceQualDescriptorCallback(SeqDescrPtr sdp,Pointer userdata)35401 static void GetBioSourcesBySourceQualDescriptorCallback (SeqDescrPtr sdp, Pointer userdata)
35402 {
35403   ObjByStrInFldPtr p;
35404   CharPtr      tmp;
35405 
35406   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
35407   p = (ObjByStrInFldPtr) userdata;
35408 
35409   if (IsStringConstraintEmpty (p->scp)) return;
35410 
35411   tmp = GetFieldValueForObject (OBJ_SEQDESC, sdp, p->field, p->scp);
35412   if (tmp != NULL) {
35413     ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp);
35414   }
35415   tmp = MemFree (tmp);
35416 }
35417 
35418 
GetBioSourcesBySourceQualFeatureCallback(SeqFeatPtr sfp,Pointer userdata)35419 static void GetBioSourcesBySourceQualFeatureCallback (SeqFeatPtr sfp, Pointer userdata)
35420 {
35421   ObjByStrInFldPtr p;
35422   CharPtr          tmp;
35423 
35424   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
35425   p = (ObjByStrInFldPtr) userdata;
35426 
35427   if (IsStringConstraintEmpty (p->scp)) return;
35428 
35429   tmp = GetFieldValueForObject (OBJ_SEQFEAT, sfp, p->field, p->scp);
35430   if (tmp != NULL) {
35431     ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp);
35432   }
35433   tmp = MemFree (tmp);
35434 }
35435 
35436 
GetBioSourcesBySourceQual(SeqEntryPtr sep,SourceQualChoicePtr q,CharPtr val,Uint1 match_location)35437 static ValNodePtr GetBioSourcesBySourceQual (SeqEntryPtr sep, SourceQualChoicePtr q, CharPtr val, Uint1 match_location)
35438 {
35439   ObjByStrInFldData od;
35440 
35441   od.scp = StringConstraintNew();
35442   od.scp->match_text = StringSave (val);
35443   od.scp->match_location = match_location;
35444   od.obj_list = NULL;
35445   od.field = ValNodeNew (NULL);
35446   od.field->choice = FieldType_source_qual;
35447   od.field->data.ptrvalue = q;
35448 
35449   VisitDescriptorsInSep (sep, &od, GetBioSourcesBySourceQualDescriptorCallback);
35450 
35451   VisitFeaturesInSep (sep, &od, GetBioSourcesBySourceQualFeatureCallback);
35452 
35453   od.field = ValNodeFree (od.field);
35454   od.scp = StringConstraintFree (od.scp);
35455   return od.obj_list;
35456 }
35457 
35458 
GetBioseqsByIdCallback(BioseqPtr bsp,Pointer data)35459 static void GetBioseqsByIdCallback (BioseqPtr bsp, Pointer data)
35460 {
35461   ObjByMatchPtr d;
35462   ObjectIdPtr   oip;
35463   SeqIdPtr      sip;
35464   Boolean       found_match = FALSE;
35465   DbtagPtr      dbtag;
35466   CharPtr       cp, tmp_id;
35467 
35468   if (bsp == NULL || data == NULL || (d = (ObjByMatchPtr) data) == NULL) {
35469     return;
35470   }
35471 
35472   found_match = DoesSeqIDListMeetStringConstraint (bsp->id, d->scp);
35473 
35474   for (sip = bsp->id; sip != NULL && !found_match; sip = sip->next) {
35475     if (sip->choice == SEQID_GENERAL && sip->data.ptrvalue != NULL) {
35476       dbtag = (DbtagPtr) sip->data.ptrvalue;
35477       if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) {
35478         if (DoesStringMatchConstraint (dbtag->tag->str, d->scp)) {
35479           found_match = TRUE;
35480         } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) {
35481           tmp_id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str + 1));
35482           StringNCpy (tmp_id, dbtag->tag->str, cp - dbtag->tag->str);
35483           tmp_id[cp - dbtag->tag->str] = 0;
35484           if (DoesStringMatchConstraint (tmp_id, d->scp)) {
35485             found_match = TRUE;
35486           }
35487           tmp_id = MemFree (tmp_id);
35488         }
35489       }
35490     } else if (sip->choice == SEQID_LOCAL && (oip = sip->data.ptrvalue) != NULL
35491                && StringNICmp (oip->str, "bankit", 6) == 0
35492                && DoesStringMatchConstraint (oip->str + 6, d->scp)) {
35493       found_match = TRUE;
35494     }
35495   }
35496   if (found_match) {
35497     ValNodeAddPointer (&(d->obj_list), OBJ_BIOSEQ, bsp);
35498   }
35499 }
35500 
35501 
FindBioseqsByMatchType(SeqEntryPtr sep,Uint1 match_location,CharPtr match_str)35502 static ValNodePtr FindBioseqsByMatchType (SeqEntryPtr sep, Uint1 match_location, CharPtr match_str)
35503 {
35504   ObjByMatchData d;
35505 
35506   if (sep == NULL || StringHasNoText (match_str)) {
35507     return NULL;
35508   }
35509   d.scp = StringConstraintNew ();
35510   d.scp->match_text = StringSave (match_str);
35511   d.scp->match_location = match_location;
35512   d.obj_list = NULL;
35513   VisitBioseqsInSep (sep, &d, GetBioseqsByIdCallback);
35514   d.scp = StringConstraintFree (d.scp);
35515   return d.obj_list;
35516 }
35517 
35518 typedef struct bioseqsearchitem {
35519   BioseqPtr bsp;
35520   CharPtr   str;
35521   Int4      num;
35522   Boolean   free_str;
35523 } BioseqSearchItemData, PNTR BioseqSearchItemPtr;
35524 
BioseqSearchItemNewStr(BioseqPtr bsp,CharPtr str,Boolean need_free)35525 static BioseqSearchItemPtr BioseqSearchItemNewStr (BioseqPtr bsp, CharPtr str, Boolean need_free)
35526 {
35527   BioseqSearchItemPtr bsi;
35528 
35529   bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData));
35530   bsi->bsp = bsp;
35531   bsi->str = str;
35532   bsi->free_str = need_free;
35533   if (StringIsAllDigits (bsi->str)) {
35534     bsi->num = atoi (bsi->str);
35535   }
35536   return bsi;
35537 }
35538 
35539 
BioseqSearchItemNewInt(BioseqPtr bsp,Int4 num)35540 static BioseqSearchItemPtr BioseqSearchItemNewInt (BioseqPtr bsp, Int4 num)
35541 {
35542   BioseqSearchItemPtr bsi;
35543 
35544   bsi = (BioseqSearchItemPtr) MemNew (sizeof (BioseqSearchItemData));
35545   bsi->bsp = bsp;
35546   bsi->num = num;
35547   bsi->free_str = FALSE;
35548   return bsi;
35549 }
35550 
35551 
BioseqSearchItemFree(BioseqSearchItemPtr bsi)35552 static BioseqSearchItemPtr BioseqSearchItemFree (BioseqSearchItemPtr bsi)
35553 {
35554   if (bsi != NULL) {
35555     if (bsi->free_str) {
35556       bsi->str = MemFree (bsi->str);
35557     }
35558     bsi = MemFree (bsi);
35559   }
35560   return bsi;
35561 }
35562 
35563 
BioseqSearchItemListFree(ValNodePtr vnp)35564 static ValNodePtr BioseqSearchItemListFree (ValNodePtr vnp)
35565 {
35566   ValNodePtr vnp_next;
35567 
35568   while (vnp != NULL) {
35569     vnp_next = vnp->next;
35570     vnp->next = NULL;
35571     vnp->data.ptrvalue = BioseqSearchItemFree (vnp->data.ptrvalue);
35572     vnp = ValNodeFree (vnp);
35573     vnp = vnp_next;
35574   }
35575   return vnp;
35576 }
35577 
35578 
CompareBioseqSearchItem(BioseqSearchItemPtr b1,BioseqSearchItemPtr b2)35579 static int CompareBioseqSearchItem (BioseqSearchItemPtr b1, BioseqSearchItemPtr b2)
35580 {
35581   if (b1 == NULL && b2 == NULL) {
35582     return 0;
35583   } else if (b1 == NULL) {
35584     return 1;
35585   } else if (b2 == NULL) {
35586     return -1;
35587   } else if (b1->num > 0 && b2->num > 0) {
35588     if (b1->num < b2->num) {
35589       return -1;
35590     } else if (b1->num == b2->num) {
35591       return 0;
35592     } else {
35593       return 1;
35594     }
35595   } else if (b1->num > 0) {
35596     return 1;
35597   } else if (b2->num > 0) {
35598     return -1;
35599   } else {
35600     return StringICmp (b1->str, b2->str);
35601   }
35602 }
35603 
35604 
SortVnpByBioseqSearchItem(VoidPtr ptr1,VoidPtr ptr2)35605 static int LIBCALLBACK SortVnpByBioseqSearchItem (VoidPtr ptr1, VoidPtr ptr2)
35606 
35607 {
35608   ValNodePtr  vnp1;
35609   ValNodePtr  vnp2;
35610 
35611   if (ptr1 == NULL || ptr2 == NULL) return 0;
35612   vnp1 = *((ValNodePtr PNTR) ptr1);
35613   vnp2 = *((ValNodePtr PNTR) ptr2);
35614   if (vnp1 == NULL || vnp2 == NULL) return 0;
35615 
35616   return CompareBioseqSearchItem(vnp1->data.ptrvalue, vnp2->data.ptrvalue);
35617 }
35618 
35619 
InitValNodeBlock(ValNodeBlockPtr vnbp,ValNodePtr list)35620 NLM_EXTERN void InitValNodeBlock (ValNodeBlockPtr vnbp, ValNodePtr list)
35621 {
35622   vnbp->head = list;
35623   vnbp->tail = list;
35624   if (vnbp->tail != NULL) {
35625     while (vnbp->tail->next != NULL) {
35626       vnbp->tail = vnbp->tail->next;
35627     }
35628   }
35629 }
35630 
35631 
ValNodeAddPointerToEnd(ValNodeBlockPtr vnbp,Uint1 choice,Pointer data)35632 NLM_EXTERN void ValNodeAddPointerToEnd (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data)
35633 {
35634   ValNodePtr vnp_new;
35635 
35636   vnp_new = ValNodeAddPointer (&(vnbp->tail), choice, data);
35637   if (vnbp->head == NULL) {
35638     vnbp->head = vnp_new;
35639   }
35640   vnbp->tail = vnp_new;
35641 }
35642 
35643 
ValNodeAddPointerToFront(ValNodeBlockPtr vnbp,Uint1 choice,Pointer data)35644 NLM_EXTERN void ValNodeAddPointerToFront (ValNodeBlockPtr vnbp, Uint1 choice, Pointer data)
35645 {
35646   ValNodePtr vnp;
35647 
35648   vnp = ValNodeNew (NULL);
35649   vnp->choice = choice;
35650   vnp->data.ptrvalue = data;
35651   vnp->next = vnbp->head;
35652   vnbp->head = vnp;
35653 }
35654 
35655 
ValNodeLinkToEnd(ValNodeBlockPtr vnbp,ValNodePtr list)35656 NLM_EXTERN void ValNodeLinkToEnd (ValNodeBlockPtr vnbp, ValNodePtr list)
35657 {
35658   if (list == NULL) {
35659     return;
35660   } else if (vnbp->head == NULL) {
35661     vnbp->head = list;
35662     vnbp->tail = list;
35663     while (vnbp->tail->next != NULL) {
35664       vnbp->tail = vnbp->tail->next;
35665     }
35666   } else {
35667     vnbp->tail->next = list;
35668     while (vnbp->tail->next != NULL) {
35669       vnbp->tail = vnbp->tail->next;
35670     }
35671   }
35672 }
35673 
35674 
ValNodeSortBlock(ValNodeBlockPtr vnbp,int (LIBCALLBACK * compar)PROTO ((Nlm_VoidPtr,Nlm_VoidPtr)))35675 NLM_EXTERN void ValNodeSortBlock (ValNodeBlockPtr vnbp, int (LIBCALLBACK *compar )PROTO ((Nlm_VoidPtr, Nlm_VoidPtr )))
35676 {
35677   if (vnbp == NULL || vnbp->head == NULL) {
35678     return;
35679   }
35680   vnbp->head = ValNodeSort(vnbp->head, compar);
35681   vnbp->tail = vnbp->head;
35682   while (vnbp->tail->next != NULL) {
35683     vnbp->tail = vnbp->tail->next;
35684   }
35685 }
35686 
35687 
FindLocalId(SeqIdPtr list)35688 static SeqIdPtr FindLocalId (SeqIdPtr list)
35689 {
35690   while (list != NULL && list->choice != SEQID_LOCAL) {
35691     list = list->next;
35692   }
35693   return list;
35694 }
35695 
35696 
BuildIdStringsListForIdList(SeqIdPtr sip_list,BioseqPtr bsp,ValNodeBlockPtr block)35697 static void BuildIdStringsListForIdList (SeqIdPtr sip_list, BioseqPtr bsp, ValNodeBlockPtr block)
35698 {
35699   SeqIdPtr   sip, sip_next, local;
35700   CharPtr    id, cp, tmp;
35701   DbtagPtr   dbtag;
35702   ObjectIdPtr oid;
35703   Int4        len;
35704   TextSeqIdPtr tsip;
35705 
35706   for (sip = sip_list; sip != NULL; sip = sip->next) {
35707     sip_next = sip->next;
35708     sip->next = NULL;
35709     id = SeqIdWholeLabel (sip, PRINTID_FASTA_LONG);
35710     tmp = SeqIdWholeLabel (sip, PRINTID_REPORT);
35711     if (StringCmp (id, tmp) != 0) {
35712       ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tmp, TRUE));
35713       tmp = NULL;
35714     } else {
35715       tmp = MemFree (tmp);
35716     }
35717     sip->next = sip_next;
35718     if (id != NULL) {
35719       /* remove terminating pipe character */
35720       if (id[StringLen(id) - 1] == '|')
35721       {
35722         id[StringLen(id) - 1] = 0;
35723       }
35724       ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
35725 
35726       /* remove leading pipe identifier */
35727       cp = StringChr (id, '|');
35728       if (cp != NULL)
35729       {
35730         cp = cp + 1;
35731         ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp, FALSE));
35732       } else {
35733         cp = id;
35734       }
35735 
35736       if ((sip->choice == SEQID_GENBANK
35737           || sip->choice == SEQID_EMBL
35738           || sip->choice == SEQID_DDBJ
35739           || sip->choice == SEQID_TPG
35740           || sip->choice == SEQID_TPE
35741           || sip->choice == SEQID_TPD
35742           || sip->choice == SEQID_PIR
35743           || sip->choice == SEQID_SWISSPROT)\
35744           && (tsip = (TextSeqIdPtr)sip->data.ptrvalue) != NULL) {
35745         /* try just accession, if version and/or name and/or release supplied */
35746         if (!StringHasNoText (tsip->accession)
35747             && (tsip->version > 0 || !StringHasNoText (tsip->name) || !StringHasNoText (tsip->release))) {
35748           ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tsip->accession, FALSE));
35749         }
35750       }
35751 
35752       /* just bankit number */
35753       if (sip->choice == SEQID_GENERAL
35754           && (dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL) {
35755         if (StringCmp (dbtag->db, "BankIt") == 0) {
35756           if (dbtag->tag->id > 0) {
35757             id = (CharPtr) MemNew (sizeof (Char) * 22);
35758             sprintf (id, "BankIt%d", dbtag->tag->id);
35759             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
35760             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewInt (bsp, dbtag->tag->id));
35761           } else {
35762             id = (CharPtr) MemNew (sizeof (Char) * (8 + StringLen (dbtag->tag->str)));
35763             sprintf (id, "BankIt%s", dbtag->tag->str);
35764             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
35765             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
35766           }
35767           /* also look for BankIt id with forward slash instead of _ */
35768           if ((cp = StringRChr (id, '_')) != NULL) {
35769             len = cp - id;
35770             tmp = StringSave (id);
35771             tmp[len] = '/';
35772             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, tmp, TRUE));
35773           }
35774         } else if (StringCmp (dbtag->db, "NCBIFILE") == 0 && dbtag->tag != NULL) {
35775           ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
35776           if ((local = FindLocalId(bsp->id)) != NULL
35777               && (oid = (ObjectIdPtr) local->data.ptrvalue) != NULL
35778               && oid->str != NULL
35779               && (cp = StringSearch (dbtag->tag->str, oid->str)) == dbtag->tag->str + StringLen (dbtag->tag->str) - StringLen (oid->str)) {
35780             /* file ID already ends with local ID, don't need to add twice, but do add file name */
35781             id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str));
35782             StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1);
35783             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
35784           } else if ((cp = StringRChr (dbtag->tag->str, '/')) != NULL) {
35785             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, cp + 1, FALSE));
35786             /* also add string for just file name */
35787             id = (CharPtr) MemNew (sizeof (Char) * (cp - dbtag->tag->str));
35788             StringNCpy (id, dbtag->tag->str, cp - dbtag->tag->str - 1);
35789             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, id, TRUE));
35790           }
35791         } else if (dbtag->tag != NULL) {
35792           /* for all other types, also use just the string or int part */
35793           if (dbtag->tag->str != NULL) {
35794             ValNodeAddPointerToEnd (block, 0, BioseqSearchItemNewStr (bsp, dbtag->tag->str, FALSE));
35795           }
35796         }
35797       }
35798     }
35799   }
35800 }
35801 
35802 
BuildIDStringsListCallback(BioseqPtr bsp,Pointer data)35803 static void BuildIDStringsListCallback (BioseqPtr bsp, Pointer data)
35804 {
35805   if (bsp != NULL) {
35806     BuildIdStringsListForIdList (bsp->id, bsp, (ValNodeBlockPtr) data);
35807   }
35808 }
35809 
35810 
AddBankItSingletons(ValNodeBlockPtr list)35811 static void AddBankItSingletons (ValNodeBlockPtr list)
35812 {
35813   BioseqSearchItemPtr item, item2;
35814   ValNodePtr vnp, forw;
35815   CharPtr bankit_str = NULL, cp;
35816   ValNodePtr other_list = NULL;
35817   Int4       len1, len2;
35818   Boolean    add_truncated;
35819 
35820   for (vnp = list->head; vnp != NULL; vnp = vnp->next) {
35821     item = (BioseqSearchItemPtr) vnp->data.ptrvalue;
35822     if (item != NULL && StringNICmp (item->str, "BankIt", 6) == 0
35823         && item->str[6] != '|'
35824         && StringChr (item->str, '/') != NULL) {
35825       ValNodeAddPointer (&other_list, 0, item);
35826     }
35827   }
35828   other_list = ValNodeSort (other_list, SortVnpByBioseqSearchItem);
35829   vnp = other_list;
35830   while (vnp != NULL) {
35831     item = (BioseqSearchItemPtr) vnp->data.ptrvalue;
35832     add_truncated = TRUE;
35833     if (vnp->next != NULL) {
35834       item2 = vnp->next->data.ptrvalue;
35835       cp = StringRChr (item->str, '/');
35836       len1 = cp - item->str;
35837       cp = StringRChr (item2->str, '/');
35838       len2 = cp - item2->str;
35839       if (len1 == len2 && StringNICmp (item->str, item2->str, len1) == 0) {
35840         add_truncated = FALSE;
35841         forw = vnp->next->next;
35842         while (forw != NULL && (item2 = (BioseqSearchItemPtr) forw->data.ptrvalue) != NULL
35843                && (cp = StringRChr (item2->str, '/')) != NULL
35844                && (len2 = cp - item2->str) == len1
35845                && StringNICmp (item->str, item2->str, len1) == 0) {
35846           forw = forw->next;
35847         }
35848         vnp = forw;
35849       }
35850     }
35851     if (add_truncated) {
35852       bankit_str = StringSave (item->str);
35853       cp = StringRChr (bankit_str, '/');
35854       if (cp != NULL) {
35855         *cp = 0;
35856       }
35857       ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, bankit_str, TRUE));
35858       /* also add string without BankIt */
35859       ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, bankit_str + 6, FALSE));
35860       vnp = vnp->next;
35861     }
35862   }
35863   other_list = ValNodeFree (other_list);
35864 }
35865 
35866 
AddUnderscoreSingletons(ValNodeBlockPtr list)35867 static void AddUnderscoreSingletons (ValNodeBlockPtr list)
35868 {
35869   BioseqSearchItemPtr item, item2;
35870   ValNodePtr vnp, forw;
35871   CharPtr single_str = NULL, cp;
35872   ValNodePtr other_list = NULL;
35873   Int4       len1, len2;
35874   Boolean    add_truncated;
35875 
35876   for (vnp = list->head; vnp != NULL; vnp = vnp->next) {
35877     item = (BioseqSearchItemPtr) vnp->data.ptrvalue;
35878     if (item != NULL && StringNICmp (item->str, "lcl|", 4) == 0
35879         && StringSearch (item->str, "__") != NULL) {
35880       ValNodeAddPointer (&other_list, 0, item);
35881     }
35882   }
35883   other_list = ValNodeSort (other_list, SortVnpByBioseqSearchItem);
35884   vnp = other_list;
35885   while (vnp != NULL) {
35886     item = (BioseqSearchItemPtr) vnp->data.ptrvalue;
35887     add_truncated = TRUE;
35888     if (vnp->next != NULL) {
35889       item2 = vnp->next->data.ptrvalue;
35890       cp = StringSearch (item->str, "__");
35891       len1 = cp - item->str;
35892       cp = StringSearch (item2->str, "__");
35893       len2 = cp - item2->str;
35894       if (len1 == len2 && StringNICmp (item->str, item2->str, len1) == 0) {
35895         add_truncated = FALSE;
35896         forw = vnp->next->next;
35897         while (forw != NULL && (item2 = (BioseqSearchItemPtr) forw->data.ptrvalue) != NULL
35898                && (cp = StringSearch (item2->str, "__")) != NULL
35899                && (len2 = cp - item2->str) == len1
35900                && StringNICmp (item->str, item2->str, len1) == 0) {
35901           forw = forw->next;
35902         }
35903         vnp = forw;
35904       }
35905     }
35906     if (add_truncated) {
35907       single_str = StringSave (item->str);
35908       cp = StringSearch (single_str, "__");
35909       if (cp != NULL) {
35910         *cp = 0;
35911       }
35912       ValNodeAddPointerToEnd (list, 0, BioseqSearchItemNewStr (item->bsp, single_str, TRUE));
35913       vnp = vnp->next;
35914     }
35915   }
35916   other_list = ValNodeFree (other_list);
35917 }
35918 
35919 
35920 /* first are str, second are int */
35921 typedef struct bioseqsearchindex {
35922   Int4 num_str;
35923   Int4 num_int;
35924   Int4 num_total;
35925   BioseqSearchItemPtr PNTR items;
35926 } BioseqSearchIndexData, PNTR BioseqSearchIndexPtr;
35927 
35928 
BioseqSearchIndexFree(BioseqSearchIndexPtr index)35929 static BioseqSearchIndexPtr BioseqSearchIndexFree (BioseqSearchIndexPtr index)
35930 {
35931   Int4 i;
35932 
35933   if (index != NULL) {
35934     for (i = 0; i < index->num_total; i++) {
35935       index->items[i] = BioseqSearchItemFree(index->items[i]);
35936     }
35937     index->items = MemFree (index->items);
35938     index = MemFree (index);
35939   }
35940   return index;
35941 }
35942 
35943 
BuildIDStringsList(SeqEntryPtr sep)35944 static BioseqSearchIndexPtr BuildIDStringsList (SeqEntryPtr sep)
35945 {
35946   ValNodeBlock vnb;
35947   ValNodePtr list = NULL, vnp;
35948   Int4       num_total, i;
35949   BioseqSearchIndexPtr index;
35950 
35951   vnb.head = NULL;
35952   vnb.tail = NULL;
35953 
35954   VisitBioseqsInSep (sep, &vnb, BuildIDStringsListCallback);
35955   AddBankItSingletons(&vnb);
35956   AddUnderscoreSingletons(&vnb);
35957   list = vnb.head;
35958   list = ValNodeSort (list, SortVnpByBioseqSearchItem);
35959 
35960   num_total = ValNodeLen (list);
35961 
35962   index = (BioseqSearchIndexPtr) MemNew (sizeof (BioseqSearchIndexData));
35963   index->items = (BioseqSearchItemPtr PNTR) MemNew (sizeof (BioseqSearchItemPtr) * num_total);
35964   for (vnp = list, i = 0; vnp != NULL && i < num_total; vnp = vnp->next, i++) {
35965     index->items[i] = vnp->data.ptrvalue;
35966     vnp->data.ptrvalue = NULL;
35967     if (index->items[i]->num > 0) {
35968       index->num_int++;
35969     } else {
35970       index->num_str++;
35971     }
35972   }
35973   index->num_total = index->num_int + index->num_str;
35974   list = ValNodeFree (list);
35975 
35976   return index;
35977 }
35978 
35979 
BuildIDStringsListForBioseqList(ValNodePtr bsp_list)35980 static BioseqSearchIndexPtr BuildIDStringsListForBioseqList (ValNodePtr bsp_list)
35981 {
35982   ValNodeBlock vnb;
35983   ValNodePtr list = NULL, vnp;
35984   Int4       num_total, i;
35985   BioseqSearchIndexPtr index;
35986 
35987   vnb.head = NULL;
35988   vnb.tail = NULL;
35989 
35990   for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
35991     BuildIDStringsListCallback (vnp->data.ptrvalue, &vnb);
35992   }
35993   AddBankItSingletons(&vnb);
35994   AddUnderscoreSingletons(&vnb);
35995   list = vnb.head;
35996   list = ValNodeSort (list, SortVnpByBioseqSearchItem);
35997 
35998   num_total = ValNodeLen (list);
35999 
36000   index = (BioseqSearchIndexPtr) MemNew (sizeof (BioseqSearchIndexData));
36001   index->items = (BioseqSearchItemPtr PNTR) MemNew (sizeof (BioseqSearchItemPtr) * num_total);
36002   for (vnp = list, i = 0; vnp != NULL && i < num_total; vnp = vnp->next, i++) {
36003     index->items[i] = vnp->data.ptrvalue;
36004     vnp->data.ptrvalue = NULL;
36005     if (index->items[i]->num > 0) {
36006       index->num_int++;
36007     } else {
36008       index->num_str++;
36009     }
36010   }
36011   index->num_total = index->num_int + index->num_str;
36012   list = ValNodeFree (list);
36013 
36014   return index;
36015 }
36016 
36017 
FindStringInIdListIndex(CharPtr str,BioseqSearchIndexPtr index)36018 static BioseqPtr FindStringInIdListIndex (CharPtr str, BioseqSearchIndexPtr index)
36019 {
36020   CharPtr    tmp;
36021   Int4       match, imax, imin, i, j;
36022   Int4       num = -1;
36023 
36024   if (index == NULL) {
36025     return NULL;
36026   }
36027   if (StringIsAllDigits (str)) {
36028     match = atoi (str);
36029     imax = index->num_total - 1;
36030     imin = index->num_str;
36031     while (imax >= imin)
36032     {
36033         i = (imax + imin)/2;
36034         if (index->items[i]->num > match)
36035             imax = i - 1;
36036         else if (index->items[i]->num < match)
36037             imin = i + 1;
36038         else
36039         {
36040             num = i;
36041             break;
36042         }
36043     }
36044 
36045   } else {
36046     imax = index->num_str - 1;
36047     imin = 0;
36048     while (imax >= imin)
36049     {
36050         i = (imax + imin)/2;
36051         tmp = index->items[i]->str;
36052         if ((j = StringICmp(tmp, str)) > 0)
36053             imax = i - 1;
36054         else if (j < 0)
36055             imin = i + 1;
36056         else
36057         {
36058             num = i;
36059             break;
36060         }
36061     }
36062   }
36063 
36064   if (num > -1) {
36065     return index->items[num]->bsp;
36066   } else {
36067     return NULL;
36068   }
36069 }
36070 
36071 
FindListInIdListIndex(Uint1 match_location,CharPtr match_str,BioseqSearchIndexPtr index)36072 static ValNodePtr FindListInIdListIndex (Uint1 match_location, CharPtr match_str, BioseqSearchIndexPtr index)
36073 {
36074   Int4 i;
36075   ValNodePtr list = NULL;
36076   StringConstraintPtr scp;
36077   Char buf[5000];
36078 
36079   if (StringHasNoText (match_str) || index == NULL) {
36080     return NULL;
36081   }
36082   scp = StringConstraintNew ();
36083   scp->match_text = StringSave (match_str);
36084   scp->match_location = match_location;
36085   for (i = 0; i < index->num_str; i++) {
36086     if (DoesStringMatchConstraint (index->items[i]->str, scp)) {
36087       ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp);
36088     }
36089   }
36090   for (i = index->num_str; i < index->num_str + index->num_int; i++) {
36091     sprintf (buf, "%u", index->items[i]->num);
36092     if (DoesStringMatchConstraint (buf, scp)) {
36093       ValNodeAddPointer (&list, OBJ_BIOSEQ, index->items[i]->bsp);
36094     }
36095   }
36096   scp = StringConstraintFree (scp);
36097 
36098   list = ValNodeSort (list, SortVnpByChoiceAndPtrvalue);
36099   ValNodeUnique (&list, SortVnpByChoiceAndPtrvalue, ValNodeFree);
36100   return list;
36101 }
36102 
36103 
36104 
36105 /* J. Chen */
36106 typedef struct bioseqbymatch {
36107 
36108   BioseqPtr bsp;
36109   CharPtr   match_str;
36110 } BioseqByMatch, PNTR BioseqByMatchPtr;
36111 
36112 
36113 
36114 /* J. Chen */
GetBioseqByProteinName(SeqFeatPtr sfp,Pointer userdata)36115 static void GetBioseqByProteinName(SeqFeatPtr sfp, Pointer userdata)
36116 {
36117   BioseqByMatchPtr bsp_m_p;
36118   ProtRefPtr prp;
36119   ValNodePtr    name;
36120 
36121   bsp_m_p = (BioseqByMatchPtr) userdata;
36122   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT
36123                   || (prp = (ProtRefPtr) sfp->data.value.ptrvalue) == NULL)
36124   {
36125      bsp_m_p->bsp = NULL;
36126      return;
36127   }
36128 
36129   for (name = prp->name;  name !=  NULL; name = name->next)
36130        if (strcmp(name->data.ptrvalue, bsp_m_p->match_str)) bsp_m_p->bsp =  NULL;
36131 
36132 }  /* GetBioseqByProteinName */
36133 
36134 
36135 
36136 /* J. Chen */
FindBioseqByProteinName(SeqEntryPtr sep,BioseqByMatchPtr bsp_m,ValNodePtr * match_list)36137 static void FindBioseqByProteinName(SeqEntryPtr sep, BioseqByMatchPtr bsp_m, ValNodePtr *match_list)
36138 {
36139    SeqEntryPtr tmp;
36140 
36141    if (IS_Bioseq(sep)) {
36142        bsp_m->bsp = (BioseqPtr) sep->data.ptrvalue;
36143        VisitFeaturesInSep (sep, bsp_m, GetBioseqByProteinName);
36144        if (bsp_m->bsp != NULL)  ValNodeAddPointer (match_list, OBJ_BIOSEQ, bsp_m->bsp);
36145    }
36146    else if (IS_Bioseq_set(sep)) {
36147        for (tmp = ((BioseqSetPtr) sep->data.ptrvalue)->seq_set; tmp != NULL; tmp= tmp->next) {
36148         FindBioseqByProteinName(tmp, bsp_m, match_list);
36149        }
36150    }
36151 }   /* FindBioseqByProteinName  */
36152 
36153 
GetAllBioseqsCallback(BioseqPtr bsp,Pointer data)36154 static void GetAllBioseqsCallback (BioseqPtr bsp, Pointer data)
36155 {
36156   if (bsp != NULL && !ISA_aa (bsp->mol)) {
36157     ValNodeAddPointer ((ValNodePtr PNTR) data, OBJ_BIOSEQ, bsp);
36158   }
36159 }
36160 
36161 
36162 typedef struct stringlist {
36163   CharPtr str;
36164   ValNodePtr list;
36165 } StringListData, PNTR StringListPtr;
36166 
36167 
GetBioseqsByBioProjectCallback(BioseqPtr bsp,Pointer data)36168 static void GetBioseqsByBioProjectCallback(BioseqPtr bsp, Pointer data)
36169 {
36170   StringListPtr s;
36171   CharPtr bioproject;
36172 
36173   if (bsp == NULL || ISA_aa(bsp->mol) || (s = (StringListPtr) data) == NULL) {
36174     return;
36175   }
36176   bioproject = GetBioProjectIdFromBioseq(bsp, NULL);
36177   if (StringICmp (bioproject, s->str) == 0) {
36178     ValNodeAddPointer (&(s->list), OBJ_BIOSEQ, bsp);
36179   }
36180   bioproject = MemFree (bioproject);
36181 }
36182 
36183 
GetBioseqsByBioProject(SeqEntryPtr sep,CharPtr match_str)36184 static ValNodePtr GetBioseqsByBioProject (SeqEntryPtr sep, CharPtr match_str)
36185 {
36186   StringListData s;
36187 
36188   MemSet (&s, 0, sizeof (StringListData));
36189   s.str = match_str;
36190 
36191   VisitBioseqsInSep (sep, &s, GetBioseqsByBioProjectCallback);
36192   return s.list;
36193 }
36194 
36195 
36196 static ValNodePtr
FindMatchForRowEx(MatchTypePtr match_type,CharPtr match_str,Uint2 entityID,SeqEntryPtr sep,BioseqSearchIndexPtr index)36197 FindMatchForRowEx
36198 (MatchTypePtr match_type,
36199  CharPtr      match_str,
36200  Uint2        entityID,
36201  SeqEntryPtr  sep,
36202  BioseqSearchIndexPtr index
36203  )
36204 {
36205   ValNodePtr match_list = NULL;
36206   FindGeneLocusTagData fd;
36207   SeqFeatPtr           sfp;
36208   SeqMgrFeatContext    fcontext;
36209   BioseqPtr            bsp;
36210   BioseqByMatch        bsp_m;
36211 
36212   if (match_type == NULL || sep == NULL) return NULL;
36213 
36214   switch (match_type->choice) {
36215     case eTableMatchProteinName:   /* J. Chen */
36216         bsp_m.match_str = match_str;
36217         FindBioseqByProteinName(sep, &bsp_m, &match_list);
36218       break;
36219     case eTableMatchFeatureID:
36220       sfp = SeqMgrGetFeatureByFeatID (entityID, NULL, match_str, NULL, &fcontext);
36221       if (sfp != NULL) {
36222         ValNodeAddPointer (&match_list, OBJ_SEQFEAT, sfp);
36223       }
36224       break;
36225     case eTableMatchGeneLocusTag:
36226       fd.locus_tag = match_str;
36227       fd.gene_list = NULL;
36228       VisitBioseqsInSep (sep, &fd, FindGeneByLocusTagBioseqCallback);
36229       ValNodeLink (&match_list, fd.gene_list);
36230       break;
36231     case eTableMatchProteinID:
36232     case eTableMatchNucID:
36233       if (match_type->match_location == String_location_equals && index != NULL) {
36234         bsp = FindStringInIdListIndex (match_str, index);
36235         if (bsp != NULL) {
36236           ValNodeAddPointer (&match_list, OBJ_BIOSEQ, bsp);
36237         }
36238       } else if (index != NULL) {
36239         ValNodeLink (&match_list, FindListInIdListIndex (match_type->match_location, match_str, index));
36240       } else {
36241         ValNodeLink (&match_list, FindBioseqsByMatchType (sep, match_type->match_location, match_str));
36242       }
36243       break;
36244     case eTableMatchDbxref:
36245       match_list = GetFeaturesByDbxref (sep, match_str, match_type->match_location);
36246       break;
36247     case eTableMatchBioSource:
36248       match_list = GetBioSourcesByTaxName (sep, match_str, match_type->match_location);
36249       break;
36250     case eTableMatchSourceQual:
36251       match_list = GetBioSourcesBySourceQual (sep, match_type->data, match_str, match_type->match_location);
36252       break;
36253     case eTableMatchAny:
36254       VisitBioseqsInSep (sep, &match_list, GetAllBioseqsCallback);
36255       break;
36256     case eTableMatchBioProject:
36257       match_list = GetBioseqsByBioProject(sep, match_str);
36258       break;
36259   }
36260   return match_list;
36261 }
36262 
36263 
36264 static ValNodePtr
FindMatchForRow(MatchTypePtr match_type,CharPtr match_str,Uint2 entityID,SeqEntryPtr sep)36265 FindMatchForRow
36266 (MatchTypePtr match_type,
36267  CharPtr      match_str,
36268  Uint2        entityID,
36269  SeqEntryPtr  sep
36270  )
36271 {
36272   return FindMatchForRowEx (match_type, match_str, entityID, sep, NULL);
36273 }
36274 
36275 
GetFeatureListForProteinBioseq(Uint1 featdef,BioseqPtr bsp)36276 static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp)
36277 {
36278   ValNodePtr feat_list = NULL;
36279   SeqFeatPtr sfp, cds;
36280   SeqMgrFeatContext fcontext;
36281   Int4              seqfeattype;
36282 
36283   if (bsp == NULL || !ISA_aa (bsp->mol))
36284   {
36285     return NULL;
36286   }
36287 
36288   seqfeattype = FindFeatFromFeatDefType (featdef);
36289   if (seqfeattype == SEQFEAT_PROT)
36290   {
36291     for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
36292          sfp != NULL;
36293          sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
36294     {
36295       ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
36296     }
36297   }
36298   else
36299   {
36300     cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
36301     if (cds != NULL)
36302     {
36303       sfp = NULL;
36304       if (featdef == FEATDEF_CDS)
36305       {
36306         sfp = cds;
36307       }
36308       else if (featdef == FEATDEF_GENE)
36309       {
36310         sfp = GetGeneForFeature (cds);
36311       }
36312       else if (featdef == FEATDEF_mRNA)
36313       {
36314         sfp = GetmRNAforCDS (cds);
36315       }
36316       if (sfp != NULL)
36317       {
36318         ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
36319       }
36320     }
36321   }
36322   return feat_list;
36323 }
36324 
36325 
GetFeatureListForNucleotideBioseq(Uint1 featdef,BioseqPtr bsp)36326 static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp)
36327 {
36328   ValNodePtr feat_list = NULL;
36329   SeqFeatPtr sfp;
36330   SeqMgrFeatContext fcontext;
36331   Int4              seqfeattype;
36332   BioseqPtr         prot_bsp;
36333 
36334   if (bsp == NULL || ISA_aa (bsp->mol))
36335   {
36336     return NULL;
36337   }
36338 
36339   seqfeattype = FindFeatFromFeatDefType (featdef);
36340   if (seqfeattype == SEQFEAT_PROT)
36341   {
36342     for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext);
36343          sfp != NULL;
36344          sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext))
36345     {
36346       prot_bsp = BioseqFindFromSeqLoc (sfp->product);
36347       ValNodeLink (&feat_list, GetFeatureListForProteinBioseq (featdef, prot_bsp));
36348     }
36349   }
36350   else
36351   {
36352     for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
36353          sfp != NULL;
36354          sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
36355     {
36356       ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
36357     }
36358   }
36359   return feat_list;
36360 }
36361 
36362 
GetFeaturesForGene(SeqFeatPtr gene,Uint1 featdef)36363 static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef)
36364 {
36365   BioseqPtr bsp;
36366   SeqFeatPtr sfp;
36367   ValNodePtr feat_list = NULL;
36368   SeqMgrFeatContext fcontext;
36369   Int4              start, stop, swap;
36370 
36371   if (gene == NULL) return NULL;
36372 
36373   bsp = BioseqFindFromSeqLoc (gene->location);
36374   start = SeqLocStart (gene->location);
36375   stop = SeqLocStop (gene->location);
36376   if (stop < start)
36377   {
36378     swap = start;
36379     start = stop;
36380     stop = swap;
36381   }
36382   for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
36383        sfp != NULL && fcontext.left < stop;
36384        sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
36385   {
36386     if (sfp != gene && fcontext.right >= start && gene == GetGeneForFeature (sfp))
36387     {
36388       ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
36389     }
36390   }
36391   return feat_list;
36392 }
36393 
36394 
GetFeatureListForGene(Uint1 featdef,SeqFeatPtr gene)36395 static ValNodePtr GetFeatureListForGene (Uint1 featdef, SeqFeatPtr gene)
36396 {
36397   ValNodePtr feat_list = NULL, cds_list, vnp;
36398   SeqFeatPtr sfp, cds;
36399   SeqMgrFeatContext fcontext;
36400   BioseqPtr         protbsp;
36401 
36402   if (gene == NULL)
36403   {
36404     return NULL;
36405   }
36406 
36407   if (featdef == FEATDEF_GENE)
36408   {
36409     ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, gene);
36410   }
36411   else if (FindFeatFromFeatDefType (featdef == SEQFEAT_PROT))
36412   {
36413     cds_list = GetFeaturesForGene (gene, FEATDEF_CDS);
36414     for (vnp = cds_list; vnp != NULL; vnp = vnp->next)
36415     {
36416       cds = vnp->data.ptrvalue;
36417       if (cds != NULL)
36418       {
36419         protbsp = BioseqFindFromSeqLoc (cds->product);
36420         for (sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, featdef, &fcontext);
36421              sfp != NULL;
36422              sfp = SeqMgrGetNextFeature (protbsp, sfp, 0, featdef, &fcontext))
36423         {
36424           ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
36425         }
36426       }
36427     }
36428     cds_list = ValNodeFree (cds_list);
36429   }
36430   else
36431   {
36432     feat_list = GetFeaturesForGene (gene, featdef);
36433   }
36434 
36435   return feat_list;
36436 }
36437 
36438 
AddFeaturesFromBioseqSet(BioseqSetPtr bssp,Uint1 featdef)36439 static ValNodePtr AddFeaturesFromBioseqSet (BioseqSetPtr bssp, Uint1 featdef)
36440 {
36441   SeqEntryPtr sep;
36442   BioseqPtr   bsp;
36443   Int4        seqfeattype;
36444   ValNodePtr  item_list = NULL;
36445 
36446   if (bssp == NULL) return NULL;
36447 
36448   seqfeattype = FindFeatFromFeatDefType (featdef);
36449   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
36450     if (sep->data.ptrvalue == NULL) continue;
36451     if (IS_Bioseq (sep)) {
36452       bsp = sep->data.ptrvalue;
36453       if (seqfeattype == SEQFEAT_PROT) {
36454         if (ISA_aa (bsp->mol)) {
36455           ValNodeLink (&item_list, GetFeatureListForProteinBioseq (featdef, bsp));
36456         }
36457       } else if (!ISA_aa (bsp->mol)) {
36458         ValNodeLink (&item_list, GetFeatureListForNucleotideBioseq (featdef, bsp));
36459       }
36460     } else if (IS_Bioseq_set (sep)) {
36461       ValNodeLink (&item_list, AddFeaturesFromBioseqSet (sep->data.ptrvalue, featdef));
36462     }
36463   }
36464   return item_list;
36465 }
36466 
36467 
GetFeatureListForBioSourceObjects(ValNodePtr item_list,FeatureFieldPtr field)36468 static ValNodePtr GetFeatureListForBioSourceObjects (ValNodePtr item_list, FeatureFieldPtr field)
36469 {
36470   ValNodePtr vnp;
36471   SeqFeatPtr sfp;
36472   SeqDescrPtr sdp;
36473   BioseqPtr   bsp;
36474   ObjValNodePtr ovp;
36475   ValNodePtr  feature_list = NULL;
36476 
36477   if (item_list == NULL || field == NULL) return NULL;
36478 
36479   for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
36480     if (vnp->choice == OBJ_SEQFEAT) {
36481       sfp = vnp->data.ptrvalue;
36482       if (sfp != NULL) {
36483         bsp = BioseqFindFromSeqLoc (sfp->location);
36484         ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp));
36485       }
36486     } else if (vnp->choice == OBJ_SEQDESC) {
36487       sdp = vnp->data.ptrvalue;
36488       if (sdp != NULL && sdp->extended != 0) {
36489         ovp = (ObjValNodePtr) sdp;
36490         if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
36491           ValNodeLink (&feature_list, AddFeaturesFromBioseqSet (ovp->idx.parentptr, GetFeatdefFromFeatureType(field->type)));
36492         } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
36493           bsp = (BioseqPtr) ovp->idx.parentptr;
36494           ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp));
36495         }
36496       }
36497     }
36498   }
36499   return feature_list;
36500 }
36501 
36502 
ValNodeCopyPtr(ValNodePtr orig)36503 NLM_EXTERN ValNodePtr ValNodeCopyPtr (ValNodePtr orig)
36504 {
36505   ValNodePtr new_list = NULL, last_vnp = NULL, vnp;
36506 
36507   while (orig != NULL) {
36508     vnp = ValNodeNew (NULL);
36509     vnp->choice = orig->choice;
36510     vnp->data.ptrvalue = orig->data.ptrvalue;
36511     if (last_vnp == NULL) {
36512       new_list = vnp;
36513     } else {
36514       last_vnp->next = vnp;
36515     }
36516     last_vnp = vnp;
36517     orig = orig->next;
36518   }
36519   return new_list;
36520 }
36521 
36522 
GetFeatureListForRowAndColumn(MatchTypePtr match_type,ValNodePtr match_list,FeatureFieldPtr field)36523 static ValNodePtr GetFeatureListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field)
36524 {
36525   ValNodePtr feature_list = NULL, vnp;
36526 
36527   if (match_list == NULL || field == NULL || match_type == NULL) return NULL;
36528 
36529   switch (match_type->choice) {
36530     case eTableMatchFeatureID:
36531       feature_list = ValNodeCopyPtr (match_list);
36532       break;
36533     case eTableMatchGeneLocusTag:
36534       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36535         ValNodeLink (&feature_list, GetFeatureListForGene (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
36536       }
36537       break;
36538     case eTableMatchProteinName:    /* J. Chen */
36539     case eTableMatchProteinID:
36540       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36541         ValNodeLink (&feature_list, GetFeatureListForProteinBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
36542       }
36543       break;
36544     case eTableMatchDbxref:
36545       feature_list = ValNodeCopyPtr (match_list);
36546       break;
36547     case eTableMatchNucID:
36548     case eTableMatchAny:
36549     case eTableMatchBioProject:
36550       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36551         ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
36552       }
36553       break;
36554     case eTableMatchBioSource:
36555     case eTableMatchSourceQual:
36556       ValNodeLink (&feature_list, GetFeatureListForBioSourceObjects (match_list, field));
36557       break;
36558   }
36559   return feature_list;
36560 }
36561 
36562 
AddBioSourcesForBioseq(BioseqPtr bsp,ValNodePtr PNTR feature_list)36563 static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list)
36564 {
36565   SeqDescrPtr sdp;
36566   SeqMgrDescContext context;
36567   Boolean any = FALSE;
36568   SeqEntryPtr sep;
36569 
36570   if (bsp == NULL || feature_list == NULL) return;
36571   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
36572         sdp != NULL;
36573         sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) {
36574     ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp);
36575     any = TRUE;
36576   }
36577   if (!any && !ISA_aa (bsp->mol)) {
36578     sep = GetBestTopParentForData (bsp->idx.entityID, bsp);
36579     sdp = CreateNewDescriptor (sep, Seq_descr_source);
36580     sdp->data.ptrvalue = BioSourceNew ();
36581     ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp);
36582   }
36583 }
36584 
AddBioSourcesForFeature(SeqFeatPtr sfp,ValNodePtr PNTR feature_list)36585 static void AddBioSourcesForFeature (SeqFeatPtr sfp, ValNodePtr PNTR feature_list)
36586 {
36587   BioseqPtr bsp;
36588 
36589   if (sfp == NULL || feature_list == NULL) return;
36590 
36591   if (sfp->data.choice == SEQFEAT_BIOSRC) {
36592     ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp);
36593   } else {
36594     bsp = BioseqFindFromSeqLoc (sfp->location);
36595     AddBioSourcesForBioseq (bsp, feature_list);
36596   }
36597 }
36598 
36599 
GetBioSourceListForRowAndColumn(MatchTypePtr match_type,ValNodePtr match_list,FeatureFieldPtr field)36600 static ValNodePtr GetBioSourceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FeatureFieldPtr field)
36601 {
36602   ValNodePtr feature_list = NULL, vnp;
36603 
36604   if (match_list == NULL || field == NULL || match_type == NULL) return NULL;
36605 
36606   switch (match_type->choice) {
36607     case eTableMatchFeatureID:
36608       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36609         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36610           AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list);
36611         }
36612       }
36613       break;
36614     case eTableMatchGeneLocusTag:
36615       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36616         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36617           AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list);
36618         }
36619       }
36620       break;
36621     case eTableMatchProteinName:    /* J. Chen */
36622     case eTableMatchProteinID:
36623     case eTableMatchNucID:
36624     case eTableMatchAny:
36625     case eTableMatchBioProject:
36626       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36627         if (vnp->choice == OBJ_BIOSEQ) {
36628           AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list);
36629         }
36630       }
36631       break;
36632     case eTableMatchDbxref:
36633       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36634         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36635           AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list);
36636         }
36637       }
36638       break;
36639     case eTableMatchBioSource:
36640     case eTableMatchSourceQual:
36641       feature_list = ValNodeCopyPtr (match_list);
36642       break;
36643   }
36644   return feature_list;
36645 }
36646 
36647 
PropagateThisDescriptor(SeqDescPtr sdp,Pointer extradata)36648 Boolean PropagateThisDescriptor (SeqDescPtr sdp, Pointer extradata)
36649 {
36650   if (sdp == (SeqDescPtr) extradata) {
36651     return TRUE;
36652   } else {
36653     return FALSE;
36654   }
36655 }
36656 
36657 
PrePropagatePubs(BioseqPtr bsp)36658 static void PrePropagatePubs (BioseqPtr bsp)
36659 {
36660   ValNodePtr pub_list = NULL, vnp;
36661   SeqEntryPtr sep;
36662   SeqDescPtr sdp;
36663   SeqMgrDescContext dcontext;
36664   ObjValNodePtr ovp;
36665   BioseqSetPtr bssp;
36666 
36667   if (bsp == NULL) {
36668     return;
36669   }
36670   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
36671        sdp != NULL;
36672        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) {
36673      if (sdp->extended && (ovp = (ObjValNodePtr) sdp) != NULL
36674          && ovp->idx.parenttype == OBJ_BIOSEQSET
36675          && (bssp = (BioseqSetPtr) ovp->idx.parentptr) != NULL
36676          && bssp->_class != BioseqseqSet_class_nuc_prot) {
36677        ValNodeAddPointer (&pub_list, OBJ_SEQDESC, sdp);
36678      }
36679   }
36680   if (pub_list != NULL) {
36681     sep = GetTopSeqEntryForEntityID(bsp->idx.entityID);
36682     for (vnp = pub_list; vnp != NULL; vnp = vnp->next) {
36683       PropagateSomeDescriptors (sep, PropagateThisDescriptor, vnp->data.ptrvalue);
36684     }
36685     DeleteMarkedObjects (bsp->idx.entityID, 0, NULL);
36686     ObjMgrSetDirtyFlag (bsp->idx.entityID, TRUE);
36687     ObjMgrSendMsg (OM_MSG_UPDATE, bsp->idx.entityID, 0, 0);
36688     pub_list = ValNodeFree (pub_list);
36689   }
36690 }
36691 
36692 
AddPubsForBioseq(BioseqPtr bsp,ValNodePtr PNTR feature_list)36693 static void AddPubsForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list)
36694 {
36695   SeqDescrPtr sdp;
36696   SeqMgrDescContext dcontext;
36697   SeqFeatPtr  sfp;
36698   SeqMgrFeatContext fcontext;
36699 
36700   if (bsp == NULL || feature_list == NULL) return;
36701 
36702   /* pre-propagate publications descriptors */
36703   PrePropagatePubs(bsp);
36704 
36705   for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
36706        sdp != NULL;
36707        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_pub, &dcontext)) {
36708     ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp);
36709   }
36710   for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
36711        sfp != NULL;
36712        sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext)) {
36713     ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp);
36714   }
36715 }
36716 
36717 
AddPubListFromBioseqSet(BioseqSetPtr bssp)36718 static ValNodePtr AddPubListFromBioseqSet (BioseqSetPtr bssp)
36719 {
36720   SeqEntryPtr sep;
36721   BioseqPtr   bsp;
36722   ValNodePtr  item_list = NULL;
36723 
36724   if (bssp == NULL) return NULL;
36725 
36726   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
36727     if (sep->data.ptrvalue == NULL) continue;
36728     if (IS_Bioseq (sep)) {
36729       bsp = sep->data.ptrvalue;
36730       if (!ISA_aa (bsp->mol)) {
36731         AddPubsForBioseq (bsp, &item_list);
36732       }
36733     } else if (IS_Bioseq_set (sep)) {
36734       ValNodeLink (&item_list, AddPubListFromBioseqSet (sep->data.ptrvalue));
36735     }
36736   }
36737   return item_list;
36738 }
36739 
36740 
GetPubListForBioSourceObjects(ValNodePtr item_list)36741 static ValNodePtr GetPubListForBioSourceObjects (ValNodePtr item_list)
36742 {
36743   ValNodePtr vnp;
36744   SeqFeatPtr sfp;
36745   SeqDescrPtr sdp;
36746   BioseqPtr   bsp;
36747   ObjValNodePtr ovp;
36748   ValNodePtr  feature_list = NULL;
36749 
36750   if (item_list == NULL) return NULL;
36751 
36752   for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
36753     if (vnp->choice == OBJ_SEQFEAT) {
36754       sfp = vnp->data.ptrvalue;
36755       if (sfp != NULL) {
36756         bsp = BioseqFindFromSeqLoc (sfp->location);
36757         AddPubsForBioseq (bsp, &feature_list);
36758       }
36759     } else if (vnp->choice == OBJ_SEQDESC) {
36760       sdp = vnp->data.ptrvalue;
36761       if (sdp != NULL && sdp->extended != 0) {
36762         ovp = (ObjValNodePtr) sdp;
36763         if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
36764           ValNodeLink (&feature_list, AddPubListFromBioseqSet (ovp->idx.parentptr));
36765         } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
36766           bsp = (BioseqPtr) ovp->idx.parentptr;
36767           AddPubsForBioseq (bsp, &feature_list);
36768         }
36769       }
36770     }
36771   }
36772   return feature_list;
36773 }
36774 
36775 
GetPubListForRowAndColumn(MatchTypePtr match_type,ValNodePtr match_list)36776 static ValNodePtr GetPubListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list)
36777 {
36778   SeqFeatPtr sfp;
36779   ValNodePtr vnp;
36780   ValNodePtr feature_list = NULL;
36781 
36782   if (match_type == NULL) return NULL;
36783 
36784   switch (match_type->choice) {
36785     case eTableMatchFeatureID:
36786       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36787         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36788           sfp = (SeqFeatPtr) vnp->data.ptrvalue;
36789           AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list);
36790         }
36791       }
36792       break;
36793     case eTableMatchGeneLocusTag:
36794       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36795         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36796           sfp = (SeqFeatPtr) vnp->data.ptrvalue;
36797           AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list);
36798         }
36799       }
36800       break;
36801     case eTableMatchProteinName:  /* J. Chen */
36802     case eTableMatchProteinID:
36803     case eTableMatchNucID:
36804     case eTableMatchAny:
36805     case eTableMatchBioProject:
36806       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36807         if (vnp->choice == OBJ_BIOSEQ) {
36808           AddPubsForBioseq (vnp->data.ptrvalue, &feature_list);
36809         }
36810       }
36811       break;
36812     case eTableMatchDbxref:
36813       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36814         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36815           sfp = (SeqFeatPtr) vnp->data.ptrvalue;
36816           AddPubsForBioseq (BioseqFindFromSeqLoc (sfp->location), &feature_list);
36817         }
36818       }
36819       break;
36820     case eTableMatchBioSource:
36821     case eTableMatchSourceQual:
36822       feature_list = GetPubListForBioSourceObjects (match_list);
36823       break;
36824   }
36825   return feature_list;
36826 }
36827 
36828 
GetSequenceListForBioSourceObjects(ValNodePtr item_list)36829 static ValNodePtr GetSequenceListForBioSourceObjects (ValNodePtr item_list)
36830 {
36831   ValNodePtr vnp;
36832   SeqFeatPtr sfp;
36833   SeqDescrPtr sdp;
36834   BioseqPtr   bsp;
36835   ObjValNodePtr ovp;
36836   ValNodePtr  seq_list = NULL;
36837   SeqEntryPtr sep;
36838 
36839   if (item_list == NULL) return NULL;
36840 
36841   for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
36842     if (vnp->choice == OBJ_SEQFEAT) {
36843       sfp = vnp->data.ptrvalue;
36844       if (sfp != NULL) {
36845         bsp = BioseqFindFromSeqLoc (sfp->location);
36846         if (bsp != NULL) {
36847           ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp);
36848         }
36849       }
36850     } else if (vnp->choice == OBJ_SEQDESC) {
36851       sdp = vnp->data.ptrvalue;
36852       if (sdp != NULL && sdp->extended != 0) {
36853         ovp = (ObjValNodePtr) sdp;
36854         if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
36855           sep = SeqMgrGetSeqEntryForData (ovp->idx.parentptr);
36856           /* VisitBioseqsInSep (sep, &seq_list, CollectNucBioseqCallback); */
36857           seq_list = CollectNucBioseqs (sep);
36858         } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
36859           bsp = (BioseqPtr) ovp->idx.parentptr;
36860           if (bsp != NULL) {
36861             ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp);
36862           }
36863         }
36864       }
36865     }
36866   }
36867   return seq_list;
36868 }
36869 
36870 
GetSequenceListForRowAndColumn(MatchTypePtr match_type,ValNodePtr match_list)36871 static ValNodePtr GetSequenceListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list)
36872 {
36873   SeqFeatPtr sfp;
36874   ValNodePtr vnp;
36875   ValNodePtr seq_list = NULL;
36876   BioseqPtr  bsp;
36877 
36878   if (match_type == NULL) return NULL;
36879 
36880   switch (match_type->choice) {
36881     case eTableMatchFeatureID:
36882       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36883         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36884           sfp = (SeqFeatPtr) vnp->data.ptrvalue;
36885           bsp = BioseqFindFromSeqLoc (sfp->location);
36886           if (bsp != NULL) {
36887             ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp);
36888           }
36889         }
36890       }
36891       break;
36892     case eTableMatchGeneLocusTag:
36893       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36894         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36895           sfp = (SeqFeatPtr) vnp->data.ptrvalue;
36896           bsp = BioseqFindFromSeqLoc (sfp->location);
36897           if (bsp != NULL) {
36898             ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp);
36899           }
36900         }
36901       }
36902       break;
36903     case eTableMatchProteinName:  /* J. Chen */
36904     case eTableMatchProteinID:
36905     case eTableMatchNucID:
36906     case eTableMatchBioProject:
36907       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36908         if (vnp->choice == OBJ_BIOSEQ) {
36909           ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, vnp->data.ptrvalue);
36910         }
36911       }
36912       break;
36913     case eTableMatchDbxref:
36914       for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
36915         if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
36916           sfp = (SeqFeatPtr) vnp->data.ptrvalue;
36917           bsp = BioseqFindFromSeqLoc (sfp->location);
36918           if (bsp != NULL) {
36919             ValNodeAddPointer (&seq_list, OBJ_BIOSEQ, bsp);
36920           }
36921         }
36922       }
36923       break;
36924     case eTableMatchBioSource:
36925     case eTableMatchSourceQual:
36926       seq_list = GetSequenceListForBioSourceObjects (match_list);
36927       break;
36928     case eTableMatchAny:
36929       seq_list = ValNodeCopyPtr (match_list);
36930       break;
36931   }
36932   return seq_list;
36933 }
36934 
36935 
GetStructuredCommentListForRowAndColumn(MatchTypePtr match_type,ValNodePtr match_list)36936 static ValNodePtr GetStructuredCommentListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list)
36937 {
36938   ValNodePtr seq_list, target_list = NULL, vnp;
36939   SeqDescrPtr sdp;
36940   SeqMgrDescContext context;
36941 
36942   seq_list = GetSequenceListForRowAndColumn (match_type, match_list);
36943 
36944   for (vnp = seq_list; vnp != NULL; vnp = vnp->next) {
36945     if (vnp->choice == OBJ_BIOSEQ) {
36946       for (sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, NULL, Seq_descr_user, &context);
36947            sdp != NULL;
36948            sdp = SeqMgrGetNextDescriptor (vnp->data.ptrvalue, sdp, Seq_descr_user, &context)) {
36949         if (IsUserObjectStructuredComment (sdp->data.ptrvalue)) {
36950           ValNodeAddPointer (&target_list, OBJ_SEQDESC, sdp);
36951         }
36952       }
36953     }
36954   }
36955   seq_list = ValNodeFree (seq_list);
36956   return target_list;
36957 }
36958 
36959 
GetTargetListForRowAndColumn(MatchTypePtr match_type,ValNodePtr match_list,FieldTypePtr field,ValNodePtr constraint)36960 static ValNodePtr GetTargetListForRowAndColumn (MatchTypePtr match_type, ValNodePtr match_list, FieldTypePtr field, ValNodePtr constraint)
36961 {
36962   ValNodePtr target_list = NULL, vnp_prev = NULL, vnp, vnp_next, tmp_list;
36963   FeatureFieldPtr feature_field;
36964 
36965   if (field == NULL || match_type == NULL) return NULL;
36966   switch (field->choice) {
36967     case FieldType_source_qual:
36968       target_list = GetBioSourceListForRowAndColumn (match_type, match_list, field->data.ptrvalue);
36969       break;
36970     case FieldType_feature_field:
36971       target_list = GetFeatureListForRowAndColumn (match_type, match_list, field->data.ptrvalue);
36972       break;
36973     case FieldType_cds_gene_prot:
36974       feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
36975       target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field);
36976       feature_field = FeatureFieldFree (feature_field);
36977       break;
36978     case FieldType_pub:
36979       target_list = GetPubListForRowAndColumn (match_type, match_list);
36980       break;
36981     case FieldType_rna_field:
36982       feature_field = FeatureFieldFromRnaQual (field->data.ptrvalue);
36983       target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field);
36984       feature_field = FeatureFieldFree (feature_field);
36985       break;
36986     case FieldType_struc_comment_field:
36987       target_list = GetStructuredCommentListForRowAndColumn (match_type, match_list);
36988       break;
36989     case FieldType_dblink:
36990       tmp_list = GetSequenceListForRowAndColumn (match_type, match_list);
36991       for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
36992         AddDBLinkDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
36993       }
36994       tmp_list = ValNodeFree (tmp_list);
36995       break;
36996     case FieldType_misc:
36997       if (field->data.intvalue == Misc_field_genome_project_id) {
36998         target_list = GetSequenceListForRowAndColumn (match_type, match_list);
36999       } else if (field->data.intvalue == Misc_field_comment_descriptor) {
37000         tmp_list = GetSequenceListForRowAndColumn (match_type, match_list);
37001         for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
37002           AddCommentDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
37003         }
37004         tmp_list = ValNodeFree (tmp_list);
37005       } else if (field->data.intvalue == Misc_field_defline) {
37006         tmp_list = GetSequenceListForRowAndColumn (match_type, match_list);
37007         for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
37008           AddDeflineDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
37009         }
37010         tmp_list = ValNodeFree (tmp_list);
37011       } else if (field->data.intvalue == Misc_field_keyword) {
37012         tmp_list = GetSequenceListForRowAndColumn (match_type, match_list);
37013         for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
37014           AddGenbankBlockDescriptorDestinationsForBioseq (vnp->data.ptrvalue, &target_list);
37015         }
37016         tmp_list = ValNodeFree (tmp_list);
37017       }
37018       break;
37019     case FieldType_molinfo_field:
37020       target_list = GetSequenceListForRowAndColumn(match_type, match_list);
37021       break;
37022   }
37023 
37024   /* remove targets that do not match constraint */
37025   vnp = target_list;
37026   while (vnp != NULL) {
37027     vnp_next = vnp->next;
37028     if (!DoesObjectMatchConstraintChoiceSet (vnp->choice, vnp->data.ptrvalue, constraint)) {
37029       if (vnp_prev == NULL) {
37030         target_list = vnp->next;
37031       } else {
37032         vnp_prev->next = vnp->next;
37033       }
37034       vnp->next = NULL;
37035       vnp = ValNodeFree (vnp);
37036     } else {
37037       vnp_prev = vnp;
37038     }
37039     vnp = vnp_next;
37040   }
37041 
37042   /* remove targets found twice */
37043   target_list = ValNodeSort (target_list, SortVnpByChoiceAndPtrvalue);
37044   ValNodeUnique (&target_list, SortVnpByChoiceAndPtrvalue, ValNodeFree);
37045 
37046   return target_list;
37047 }
37048 
37049 
ReportMissingTargets(ValNodeBlockPtr err_list,FieldTypePtr ft,CharPtr match_val,Int4 col_num,Int4 line_num)37050 static void ReportMissingTargets (ValNodeBlockPtr err_list, FieldTypePtr ft, CharPtr match_val, Int4 col_num, Int4 line_num)
37051 {
37052   CharPtr            feat_name;
37053   FeatureFieldPtr    field;
37054   CharPtr            no_feat_fmt = "No %s feature for %s (column %d, line %d)";
37055   CharPtr            no_src_fmt = "No biosource for %s (column %d, line %d)";
37056   CharPtr            no_seq_fmt = "No sequence for %s (column %d, line %d)";
37057   CharPtr            no_cmt_fmt = "No structured comment for %s (column %d, line %d)";
37058   CharPtr            err_msg;
37059   RnaQualPtr         rq;
37060 
37061   if (err_list == NULL || ft == NULL || match_val == NULL) return;
37062 
37063   switch (ft->choice) {
37064     case FieldType_source_qual:
37065       err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_src_fmt)
37066                                                     + StringLen (match_val)
37067                                                     + 30));
37068       sprintf (err_msg, no_src_fmt, match_val, col_num, line_num);
37069       ValNodeAddPointerToEnd (err_list, 0, err_msg);
37070       break;
37071     case FieldType_feature_field:
37072       field = (FeatureFieldPtr) ft->data.ptrvalue;
37073       if (field != NULL) {
37074         feat_name = GetFeatureNameFromFeatureType (field->type);
37075         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt)
37076                                                       + StringLen (feat_name)
37077                                                       + StringLen (match_val)
37078                                                       + 30));
37079         sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num);
37080         ValNodeAddPointerToEnd (err_list, 0, err_msg);
37081       }
37082       break;
37083     case FieldType_cds_gene_prot:
37084       field = FeatureFieldFromCDSGeneProtField (ft->data.intvalue);
37085       if (field != NULL) {
37086         feat_name = GetFeatureNameFromFeatureType (field->type);
37087         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt)
37088                                                       + StringLen (feat_name)
37089                                                       + StringLen (match_val)
37090                                                       + 30));
37091         sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num);
37092         ValNodeAddPointerToEnd (err_list, 0, err_msg);
37093       }
37094       field = FeatureFieldFree (field);
37095       break;
37096     case FieldType_rna_field:
37097       rq = (RnaQualPtr) ft->data.ptrvalue;
37098       if (rq != NULL) {
37099         feat_name = SummarizeRnaType (rq->type);
37100         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt)
37101                                                       + StringLen (feat_name)
37102                                                       + StringLen (match_val)
37103                                                       + 30));
37104         sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num);
37105         ValNodeAddPointerToEnd (err_list, 0, err_msg);
37106       }
37107       break;
37108     case FieldType_struc_comment_field:
37109       err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_cmt_fmt) + StringLen (match_val) + 30));
37110       sprintf (err_msg, no_cmt_fmt, match_val, col_num, line_num);
37111       ValNodeAddPointerToEnd (err_list, 0, err_msg);
37112       break;
37113     case FieldType_misc:
37114       err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_seq_fmt)
37115                                                     + StringLen (match_val)
37116                                                     + 30));
37117       sprintf (err_msg, no_seq_fmt, match_val, col_num, line_num);
37118       ValNodeAddPointerToEnd (err_list, 0, err_msg);
37119       break;
37120   }
37121 }
37122 
37123 
ReportEmptyIDColumn(ValNodeBlockPtr vnb,Int4 line_num)37124 static void ReportEmptyIDColumn (ValNodeBlockPtr vnb, Int4 line_num)
37125 {
37126   CharPtr            err_msg;
37127   CharPtr            missing_id_fmt = "No ID for line %d";
37128 
37129   err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_id_fmt) + 15));
37130   sprintf (err_msg, missing_id_fmt, line_num);
37131   ValNodeAddPointerToEnd (vnb, 0, err_msg);
37132 }
37133 
FindMatchChoiceInLine(ValNodePtr val_vnp,ValNodePtr col_vnp)37134 static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp)
37135 {
37136   TabColumnConfigPtr t;
37137 
37138   while (val_vnp != NULL && col_vnp != NULL) {
37139     t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
37140     if (t != NULL && t->match_type != NULL) {
37141       return val_vnp;
37142     }
37143     val_vnp = val_vnp->next;
37144     col_vnp = col_vnp->next;
37145   }
37146   return NULL;
37147 }
37148 
37149 
GetmRNAForFeature(SeqFeatPtr sfp)37150 NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp)
37151 {
37152   BioseqPtr         pbsp;
37153 
37154   if (sfp == NULL) return NULL;
37155   if (sfp->data.choice == SEQFEAT_PROT)
37156   {
37157     pbsp = BioseqFindFromSeqLoc (sfp->location);
37158     sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL);
37159     if (sfp == NULL) return NULL;
37160   }
37161   return GetmRNAforCDS (sfp);
37162 }
37163 
37164 
AdjustmRNAProductToMatchProteinProduct(SeqFeatPtr sfp)37165 NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp)
37166 {
37167   SeqFeatPtr mrna;
37168   ProtRefPtr prp;
37169   RnaRefPtr  rrp;
37170 
37171   if (sfp == NULL) {
37172     return FALSE;
37173   }
37174 
37175   if (sfp->data.choice == SEQFEAT_PROT) {
37176     prp = (ProtRefPtr) sfp->data.value.ptrvalue;
37177   } else if (sfp->data.choice == SEQFEAT_CDREGION) {
37178     prp = GetProtRefForFeature(sfp);
37179   } else {
37180     return FALSE;
37181   }
37182   mrna = GetmRNAForFeature (sfp);
37183 
37184   if (mrna == NULL) return FALSE;
37185 
37186   rrp = (RnaRefPtr) mrna->data.value.ptrvalue;
37187   if (rrp == NULL)
37188   {
37189     rrp = RnaRefNew();
37190     mrna->data.value.ptrvalue = rrp;
37191   }
37192 
37193   rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
37194   if (prp == NULL || prp->name == NULL || StringHasNoText (prp->name->data.ptrvalue))
37195   {
37196     rrp->ext.choice = 0;
37197   }
37198   else
37199   {
37200     rrp->ext.choice = 1;
37201     rrp->ext.value.ptrvalue = StringSave (prp->name->data.ptrvalue);
37202   }
37203   return TRUE;
37204 }
37205 
37206 
IsFieldTypeCDSProduct(FieldTypePtr ft)37207 NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft)
37208 {
37209   FeatureFieldPtr field;
37210   Boolean         rval = FALSE;
37211 
37212   if (ft == NULL) return FALSE;
37213   if (ft->choice == FieldType_feature_field) {
37214     field = (FeatureFieldPtr) ft->data.ptrvalue;
37215     if (field != NULL && field->field != NULL && field->field->choice == FeatQualChoice_legal_qual) {
37216       if (field->type == Macro_feature_type_cds
37217           && field->field->data.intvalue == Feat_qual_legal_product) {
37218         rval = TRUE;
37219       } else if (field->type == Macro_feature_type_prot
37220                  && field->field->data.intvalue == Feat_qual_legal_product) {
37221         rval = TRUE;
37222       }
37223     }
37224   } else if (ft->choice == FieldType_cds_gene_prot) {
37225     if (ft->data.intvalue == CDSGeneProt_field_prot_name) {
37226       rval = TRUE;
37227     }
37228   }
37229   return rval;
37230 }
37231 
37232 
IsFieldTypeProteinDesc(FieldTypePtr ft)37233 static Boolean IsFieldTypeProteinDesc (FieldTypePtr ft)
37234 {
37235   FeatureFieldPtr field;
37236   Boolean         rval = FALSE;
37237 
37238   if (ft == NULL) return FALSE;
37239   if (ft->choice == FieldType_feature_field) {
37240     field = (FeatureFieldPtr) ft->data.ptrvalue;
37241     if (field != NULL && (field->type == Macro_feature_type_cds || field->type == Macro_feature_type_prot)
37242         && field->field != NULL
37243         && field->field->choice == FeatQualChoice_legal_qual
37244         && field->field->data.intvalue == Feat_qual_legal_description) {
37245       rval = TRUE;
37246     }
37247   } else if (ft->choice == FieldType_cds_gene_prot) {
37248     if (ft->data.intvalue == CDSGeneProt_field_prot_description) {
37249       rval = TRUE;
37250     }
37251   }
37252   return rval;
37253 }
37254 
37255 
IsFieldTypeGeneLocusTag(FieldTypePtr ft)37256 static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft)
37257 {
37258   FeatureFieldPtr field;
37259   RnaQualPtr      rq;
37260   Boolean         rval = FALSE;
37261 
37262   if (ft == NULL) return FALSE;
37263   if (ft->choice == FieldType_feature_field) {
37264     field = (FeatureFieldPtr) ft->data.ptrvalue;
37265     if (field != NULL && field->type == Macro_feature_type_gene
37266         && field->field != NULL
37267         && field->field->choice == FeatQualChoice_legal_qual
37268         && field->field->data.intvalue == Feat_qual_legal_locus_tag) {
37269       rval = TRUE;
37270     }
37271   } else if (ft->choice == FieldType_cds_gene_prot) {
37272     if (ft->data.intvalue == CDSGeneProt_field_gene_locus_tag) {
37273       rval = TRUE;
37274     }
37275   } else if (ft->choice == FieldType_rna_field) {
37276     rq = (RnaQualPtr) ft->data.ptrvalue;
37277     if (rq != NULL && rq->field == Rna_field_gene_locus_tag) {
37278       rval = TRUE;
37279     }
37280   }
37281 
37282   return rval;
37283 }
37284 
37285 
37286 
ValidateTabTableValues(ValNodePtr table,ValNodePtr columns)37287 NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns)
37288 {
37289   ValNodePtr err_list = NULL;
37290   ValNodePtr line_vnp, col_vnp, val_vnp;
37291   Int4       line_num, col_num;
37292   TabColumnConfigPtr t;
37293   ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp, tmp_field, sq;
37294   CharPtr    bad_format_fmt = "Locus tag %s has incorrect format";
37295   CharPtr    dup_fmt = "Locus tag %s appears in the table more than once";
37296   CharPtr    inconsistent_fmt = "Locus tag prefix for %s is inconsistent";
37297   CharPtr    bad_molinfo_fmt = "'%s' is not a valid value for this field";
37298   CharPtr    err_msg;
37299   MatchTypePtr match_type;
37300 
37301   if (table == NULL || columns == NULL) {
37302     return NULL;
37303   }
37304 
37305   match_type = FindMatchTypeInHeader (columns);
37306   if (match_type == NULL) {
37307     ValNodeAddPointer (&err_list, 0, StringSave ("No match type"));
37308     return err_list;
37309   }
37310 
37311   if (match_type->choice == eTableMatchAny && table->next != NULL) {
37312     if (table->next->next != NULL) {
37313       ValNodeAddPointer (&err_list, 0, StringSave ("Too many rows for apply to all"));
37314     } else {
37315       /* skip header */
37316       table = table->next;
37317     }
37318   }
37319 
37320   for (line_vnp = table, line_num = 1;
37321        line_vnp != NULL;
37322        line_vnp = line_vnp->next, line_num++) {
37323     for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
37324          val_vnp != NULL && col_vnp != NULL;
37325          val_vnp = val_vnp->next, col_vnp = col_vnp->next, col_num++) {
37326       t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
37327       if (t == NULL || t->match_type != NULL || val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)) {
37328         continue;
37329       }
37330       if (IsFieldTypeGeneLocusTag (t->field)) {
37331         ValNodeAddPointer (&locus_tag_values, 0, val_vnp->data.ptrvalue);
37332       } else if (t->field != NULL && t->field->choice == FieldType_molinfo_field && val_vnp->data.ptrvalue != NULL) {
37333         tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue);
37334         if (tmp_field == NULL) {
37335           err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue)));
37336           sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue);
37337           ValNodeAddPointer (&err_list, 0, err_msg);
37338         }
37339         tmp_field = MolinfoFieldFree(tmp_field);
37340       } else if (t->field != NULL && t->field->choice == FieldType_source_qual
37341                  && (sq = (ValNodePtr)(t->field->data.ptrvalue)) != NULL
37342                  && sq->choice == SourceQualValChoice_location) {
37343         tmp_field = SrcLocationFieldFromValue(val_vnp->data.ptrvalue);
37344         if (tmp_field == NULL) {
37345           err_msg =(CharPtr) MemNew (sizeof (Char) * (StringLen(bad_molinfo_fmt) + StringLen (val_vnp->data.ptrvalue)));
37346           sprintf (err_msg, bad_molinfo_fmt, val_vnp->data.ptrvalue);
37347           ValNodeAddPointer (&err_list, 0, err_msg);
37348         }
37349         tmp_field = FieldTypeFree (tmp_field);
37350       }
37351     }
37352   }
37353 
37354   bad_locus_tags = FindBadLocusTagsInList (locus_tag_values);
37355   for (vnp = bad_locus_tags; vnp != NULL; vnp = vnp->next) {
37356     switch (vnp->choice) {
37357       case eLocusTagErrorBadFormat:
37358         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_format_fmt) + StringLen (vnp->data.ptrvalue)));
37359         sprintf (err_msg, bad_format_fmt, vnp->data.ptrvalue);
37360         ValNodeAddPointer (&err_list, 0, err_msg);
37361         break;
37362       case eLocusTagErrorDuplicate:
37363         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (dup_fmt) + StringLen (vnp->data.ptrvalue)));
37364         sprintf (err_msg, dup_fmt, vnp->data.ptrvalue);
37365         ValNodeAddPointer (&err_list, 0, err_msg);
37366         break;
37367       case eLocusTagErrorInconsistentPrefix:
37368         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (inconsistent_fmt) + StringLen (vnp->data.ptrvalue)));
37369         sprintf (err_msg, inconsistent_fmt, vnp->data.ptrvalue);
37370         ValNodeAddPointer (&err_list, 0, err_msg);
37371         break;
37372     }
37373   }
37374   locus_tag_values = ValNodeFree (locus_tag_values);
37375   return err_list;
37376 }
37377 
37378 
GetSequenceListsForMatchTypeInTabTable(SeqEntryPtr sep,ValNodePtr table,Int4 col,MatchTypePtr match_type,ValNodePtr PNTR p_err_list)37379 NLM_EXTERN ValNodePtr GetSequenceListsForMatchTypeInTabTable (SeqEntryPtr sep, ValNodePtr table, Int4 col, MatchTypePtr match_type, ValNodePtr PNTR p_err_list)
37380 {
37381   ValNodePtr vnp_row, vnp;
37382   ValNodePtr sequence_lists = NULL, match_list, target_list;
37383   Uint2      entityID;
37384   Int4       num, line;
37385   CharPtr    no_match_fmt = "No match for %s, line %d";
37386   CharPtr    no_match_txt_fmt = "No match text for line %d";
37387   CharPtr    msg;
37388   BioseqSearchIndexPtr  index = NULL;
37389 
37390   if (sep == NULL || table == NULL || match_type == NULL || col < 0) {
37391     return NULL;
37392   }
37393 
37394   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
37395 
37396   index = BuildIDStringsList(sep);
37397   if (match_type->choice == eTableMatchAny && table->next != NULL) {
37398     /* skip first row, must contain header */
37399     table = table->next;
37400   }
37401 
37402   for (vnp_row = table, line = 1; vnp_row != NULL; vnp_row = vnp_row->next, line++) {
37403     vnp = vnp_row->data.ptrvalue;
37404     num = 0;
37405     while (vnp != NULL && num < col) {
37406       vnp = vnp->next;
37407       num++;
37408     }
37409     if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) {
37410       ValNodeAddPointer (&sequence_lists, 0, NULL);
37411       if (p_err_list != NULL) {
37412         msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_txt_fmt) + 15));
37413         sprintf (msg, no_match_txt_fmt, line);
37414         ValNodeAddPointer (p_err_list, 0, msg);
37415       }
37416     } else {
37417       match_list = FindMatchForRowEx (match_type, vnp->data.ptrvalue, entityID, sep, index);
37418       target_list = GetSequenceListForRowAndColumn (match_type, match_list);
37419       match_list = ValNodeFree (match_list);
37420       ValNodeAddPointer (&sequence_lists, 0, target_list);
37421       if (target_list == NULL && p_err_list != NULL) {
37422         msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (vnp->data.ptrvalue) + 15));
37423         sprintf (msg, no_match_fmt, vnp->data.ptrvalue, line);
37424         ValNodeAddPointer (p_err_list, 0, msg);
37425       }
37426     }
37427   }
37428   index = BioseqSearchIndexFree (index);
37429 
37430   return sequence_lists;
37431 }
37432 
37433 
FreeSequenceLists(ValNodePtr lists)37434 NLM_EXTERN ValNodePtr FreeSequenceLists (ValNodePtr lists)
37435 {
37436   ValNodePtr vnp;
37437 
37438   for (vnp = lists; vnp != NULL; vnp = vnp->next) {
37439     vnp->data.ptrvalue = ValNodeFree (vnp->data.ptrvalue);
37440   }
37441   lists = ValNodeFree (lists);
37442   return lists;
37443 }
37444 
37445 
GetBioseqMatchesForSequenceIDs(ValNodePtr query_list,Uint1 match_location,SeqEntryPtr sep)37446 NLM_EXTERN ValNodePtr GetBioseqMatchesForSequenceIDs (ValNodePtr query_list, Uint1 match_location, SeqEntryPtr sep)
37447 {
37448   ValNodePtr response_list = NULL, vnp, single_list, vnp_t;
37449   BioseqSearchIndexPtr  index = NULL;
37450   BioseqPtr             bsp;
37451   ValNodeBlock          thisid_index;
37452   BioseqSearchItemPtr   si;
37453   Char                  num_buf[15];
37454   CharPtr               match_str;
37455 
37456   index = BuildIDStringsList(sep);
37457 
37458   for (vnp = query_list; vnp != NULL; vnp = vnp->next) {
37459     InitValNodeBlock (&thisid_index, NULL);
37460     BuildIdStringsListForIdList (vnp->data.ptrvalue, NULL, &thisid_index);
37461 
37462     bsp = NULL;
37463     for (vnp_t = thisid_index.head; vnp_t != NULL && bsp == NULL; vnp_t = vnp_t->next) {
37464       si = (BioseqSearchItemPtr) vnp_t->data.ptrvalue;
37465       if (si->num > 0) {
37466         sprintf (num_buf, "%d", si->num);
37467         match_str = num_buf;
37468       } else {
37469         match_str = si->str;
37470       }
37471       if (match_location == String_location_equals) {
37472         bsp = FindStringInIdListIndex (match_str, index);
37473       } else {
37474         single_list = FindListInIdListIndex (match_location, match_str, index);
37475         if (single_list != NULL && single_list->next == NULL) {
37476           bsp = single_list->data.ptrvalue;
37477         }
37478         single_list = ValNodeFree (single_list);
37479       }
37480     }
37481 
37482     thisid_index.head = BioseqSearchItemListFree(thisid_index.head);
37483     ValNodeAddPointer (&response_list, OBJ_BIOSEQ, bsp);
37484   }
37485 
37486   index = BioseqSearchIndexFree (index);
37487   return response_list;
37488 }
37489 
37490 
ReportTableSummaryLine(Int4 err_lines,Int4 total_lines,CharPtr fmt)37491 static ValNodePtr ReportTableSummaryLine (Int4 err_lines, Int4 total_lines, CharPtr fmt)
37492 {
37493   CharPtr str;
37494   ValNodePtr vnp;
37495 
37496   str = (CharPtr) MemNew (sizeof (Char) + (StringLen (fmt) + 30));
37497   sprintf (str, fmt, err_lines, total_lines);
37498   vnp = ValNodeNew (NULL);
37499   vnp->data.ptrvalue = str;
37500   return vnp;
37501 }
37502 
37503 
GetObjectTableForTabTable(SeqEntryPtr sep,ValNodePtr table,ValNodePtr columns,ValNodePtr PNTR p_err_list)37504 NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr PNTR p_err_list)
37505 {
37506   ValNodeBlock vnb;
37507   ValNodePtr line_vnp, val_vnp, col_vnp, err_vnp;
37508   ValNodePtr obj_table = NULL, obj_row, last_obj = NULL, tmp, last = NULL;
37509   Int4       line_num = 1, col_num;
37510   Uint2      entityID;
37511   ValNodePtr match_list, match_choice, target_list;
37512   TabColumnConfigPtr t;
37513   CharPtr            err_msg;
37514   CharPtr            no_match_fmt = "No match for %s, line %d";
37515   MatchTypePtr       match_type;
37516   Int4       num_empty = 0, num_missing = 0, num_no_targets = 0;
37517   BioseqSearchIndexPtr  index = NULL;
37518 
37519   vnb.head = NULL;
37520   vnb.tail = NULL;
37521 
37522   if (sep == NULL) {
37523     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry"));
37524   }
37525   if (table == NULL) {
37526     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table"));
37527   }
37528   if (columns == NULL) {
37529     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information"));
37530   }
37531 
37532   match_type = FindMatchTypeInHeader (columns);
37533   if (match_type == NULL) {
37534     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No Match Type"));
37535   } else if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) {
37536     if (table->next->next != NULL) {
37537       ValNodeAddPointerToEnd (&vnb, 0, StringSave ("Too many rows for apply to all"));
37538     } else {
37539       /* skip header */
37540       table = table->next;
37541     }
37542   }
37543 
37544   if (vnb.head != NULL) {
37545     if (p_err_list == NULL) {
37546       vnb.head = ValNodeFreeData (vnb.head);
37547     } else {
37548       *p_err_list = vnb.head;
37549     }
37550     return NULL;
37551   }
37552 
37553   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
37554 
37555 
37556   index = BuildIDStringsList(sep);
37557 
37558   last = NULL;
37559   for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) {
37560     obj_row = NULL;
37561     match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns);
37562     if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) {
37563       ReportEmptyIDColumn (&vnb, line_num);
37564       num_empty++;
37565     } else {
37566       match_list = FindMatchForRowEx (match_type, match_choice->data.ptrvalue, entityID, sep, index);
37567       if (match_list == NULL) {
37568         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15));
37569         sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num);
37570         ValNodeAddPointerToEnd (&vnb, 0, err_msg);
37571         num_missing ++;
37572       } else {
37573         for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
37574              col_vnp != NULL;
37575              col_vnp = col_vnp->next, col_num++) {
37576           target_list = NULL;
37577           t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
37578           if (t == NULL || t->match_type != NULL
37579               || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
37580             /* no targets */
37581           } else {
37582             target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint);
37583             if (target_list == NULL) {
37584               ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num);
37585               num_no_targets++;
37586             }
37587           }
37588           ValNodeAddPointer (&obj_row, 0, target_list);
37589           if (val_vnp != NULL) {
37590             val_vnp = val_vnp->next;
37591           }
37592         }
37593       }
37594       match_list = ValNodeFree (match_list);
37595     }
37596     tmp = ValNodeAddPointer (&last_obj, 0, obj_row);
37597     if (obj_table == NULL) {
37598       obj_table = last_obj;
37599     }
37600     last_obj = tmp;
37601   }
37602 
37603   match_type = MatchTypeFree (match_type);
37604   index = BioseqSearchIndexFree (index);
37605 
37606   if (vnb.head != NULL) {
37607     if (num_empty > 0) {
37608       err_vnp = ReportTableSummaryLine (num_empty, line_num - 1, "%d lines out of %d have no ID value");
37609       err_vnp->next = vnb.head;
37610       vnb.head = err_vnp;
37611     }
37612     if (num_no_targets > 0) {
37613       err_vnp = ReportTableSummaryLine (num_no_targets, line_num - 1, "%d lines out of %d have no targets");
37614       err_vnp->next = vnb.head;
37615       vnb.head = err_vnp;
37616     }
37617     if (num_missing > 0) {
37618       err_vnp = ReportTableSummaryLine (num_missing, line_num - 1, "%d lines out of %d have no match");
37619       err_vnp->next = vnb.head;
37620       vnb.head = err_vnp;
37621     }
37622     if (p_err_list == NULL) {
37623       vnb.head = ValNodeFreeData (vnb.head);
37624     } else {
37625       *p_err_list = vnb.head;
37626     }
37627   }
37628   return obj_table;
37629 }
37630 
37631 
FreeObjectTableForTabTable(ValNodePtr table)37632 NLM_EXTERN ValNodePtr FreeObjectTableForTabTable (ValNodePtr table)
37633 {
37634   ValNodePtr vnp_next, vnp_row, vnp_row_next;
37635 
37636   while (table != NULL) {
37637     vnp_next = table->next;
37638     table->next = NULL;
37639     vnp_row = table->data.ptrvalue;
37640     while (vnp_row != NULL) {
37641       vnp_row_next = vnp_row->next;
37642       vnp_row->next = NULL;
37643       vnp_row->data.ptrvalue = ValNodeFree (vnp_row->data.ptrvalue);
37644       vnp_row = ValNodeFree (vnp_row);
37645       vnp_row = vnp_row_next;
37646     }
37647     table = ValNodeFree (table);
37648     table = vnp_next;
37649   }
37650   return table;
37651 }
37652 
37653 
37654 typedef struct countfeat {
37655   Uint1 featdef;
37656   Int4 num;
37657 } CountFeatData, PNTR CountFeatPtr;
37658 
37659 
CountFeaturesCallback(SeqFeatPtr sfp,Pointer userdata)37660 static void CountFeaturesCallback (SeqFeatPtr sfp, Pointer userdata)
37661 {
37662   CountFeatPtr p;
37663 
37664   if (sfp == NULL || userdata == NULL) return;
37665 
37666   p = (CountFeatPtr) userdata;
37667   if (sfp->idx.subtype == p->featdef) {
37668     p->num++;
37669   }
37670 }
37671 
CountBioSourceDescriptorsCallback(SeqDescrPtr sdp,Pointer userdata)37672 static void CountBioSourceDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata)
37673 {
37674   Int4Ptr p;
37675 
37676   p = (Int4Ptr) userdata;
37677   if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_source) {
37678     (*p)++;
37679   }
37680 }
37681 
37682 
CountPubDescriptorsCallback(SeqDescrPtr sdp,Pointer userdata)37683 static void CountPubDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata)
37684 {
37685   Int4Ptr p;
37686 
37687   p = (Int4Ptr) userdata;
37688   if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_pub) {
37689     (*p)++;
37690   }
37691 }
37692 
37693 
CountObjectsForColumnFields(SeqEntryPtr sep,ValNodePtr columns)37694 static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr columns)
37695 {
37696   ValNodePtr count_list = NULL, vnp;
37697   TabColumnConfigPtr t;
37698   CountFeatData d;
37699   FeatureFieldPtr f;
37700   Int4 num;
37701   Uint1 featdef = 0;
37702   ValNodePtr tmp_list = NULL;
37703 
37704   d.featdef = 0;
37705   d.num = 0;
37706   for (vnp = columns; vnp != NULL; vnp = vnp->next) {
37707     num = 0;
37708     t = (TabColumnConfigPtr) vnp->data.ptrvalue;
37709     if (t != NULL && t->match_type == NULL && t->field != NULL) {
37710       switch (t->field->choice) {
37711         case FieldType_source_qual:
37712           if (featdef != FEATDEF_BIOSRC) {
37713             d.featdef = FEATDEF_BIOSRC;
37714             d.num = 0;
37715             VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
37716             VisitDescriptorsInSep (sep, &(d.num), CountBioSourceDescriptorsCallback);
37717           }
37718           num = d.num;
37719           break;
37720         case FieldType_feature_field:
37721           f = (FeatureFieldPtr) t->field->data.ptrvalue;
37722           if (f != NULL) {
37723             featdef = GetFeatdefFromFeatureType(f->type);
37724             if (featdef != d.featdef) {
37725               d.featdef = featdef;
37726               d.num = 0;
37727               VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
37728             }
37729             num = d.num;
37730           }
37731           break;
37732         case FieldType_cds_gene_prot:
37733           f = FeatureFieldFromCDSGeneProtField (t->field->data.intvalue);
37734           if (f != NULL) {
37735             featdef = GetFeatdefFromFeatureType(f->type);
37736             if (featdef != d.featdef) {
37737               d.featdef = featdef;
37738               d.num = 0;
37739               VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
37740             }
37741             num = d.num;
37742           }
37743           f = FeatureFieldFree (f);
37744           break;
37745         case FieldType_rna_field:
37746           f = FeatureFieldFromRnaQual (t->field->data.ptrvalue);
37747           if (f != NULL) {
37748             featdef = GetFeatdefFromFeatureType(f->type);
37749             if (featdef != d.featdef) {
37750               d.featdef = featdef;
37751               d.num = 0;
37752               VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
37753             }
37754             num = d.num;
37755           }
37756           f = FeatureFieldFree (f);
37757           break;
37758         case FieldType_pub:
37759           d.featdef = FEATDEF_PUB;
37760           d.num = 0;
37761           VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
37762           VisitDescriptorsInSep (sep, &(d.num), CountPubDescriptorsCallback);
37763           num = d.num;
37764           break;
37765         case FieldType_struc_comment_field:
37766           VisitDescriptorsInSep (sep, &tmp_list, CollectStructuredCommentsCallback);
37767           num = ValNodeLen (tmp_list);
37768           tmp_list = ValNodeFree (tmp_list);
37769           break;
37770         case FieldType_dblink:
37771           tmp_list = CollectDBLinkDescriptors (sep);
37772           num = ValNodeLen (tmp_list);
37773           tmp_list = ValNodeFree (tmp_list);
37774           break;
37775         case FieldType_misc:
37776           if (t->field->data.intvalue == Misc_field_genome_project_id) {
37777             /* VisitBioseqsInSep (sep, &tmp_list, CollectNucBioseqCallback); */
37778             tmp_list = CollectNucBioseqs (sep);
37779             num = ValNodeLen (tmp_list);
37780             tmp_list = ValNodeFree (tmp_list);
37781           } else if (t->field->data.intvalue == Misc_field_comment_descriptor) {
37782             tmp_list = CollectCommentDescriptors (sep);
37783             num = ValNodeLen (tmp_list);
37784             tmp_list = ValNodeFree (tmp_list);
37785           } else if (t->field->data.intvalue == Misc_field_defline) {
37786             tmp_list = CollectDeflineDescriptors (sep);
37787             num = ValNodeLen (tmp_list);
37788             tmp_list = ValNodeFree (tmp_list);
37789           } else if (t->field->data.intvalue == Misc_field_keyword) {
37790             tmp_list = CollectGenbankBlockDescriptors (sep);
37791             num = ValNodeLen (tmp_list);
37792             tmp_list = ValNodeFree (tmp_list);
37793           }
37794           break;
37795         case FieldType_molinfo_field:
37796           VisitBioseqsInSep (sep, &tmp_list, CollectBioseqCallback);
37797           num = ValNodeLen (tmp_list);
37798           tmp_list = ValNodeFree (tmp_list);
37799           break;
37800       }
37801     }
37802     ValNodeAddInt (&count_list, 0, num);
37803   }
37804   return count_list;
37805 }
37806 
37807 
ApplyTableValuesToObjectTable(SeqEntryPtr sep,ValNodePtr table,ValNodePtr columns,ValNodePtr obj_table)37808 NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table)
37809 {
37810   ValNodePtr val_line_vnp, obj_line_vnp;
37811   ValNodePtr val_vnp, obj_vnp, col_vnp;
37812   ValNodePtr target_vnp, tmp_field;
37813   TabColumnConfigPtr t;
37814   CharPtr val, qual_name;
37815   ValNodePtr         err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp, sq;
37816   CharPtr            err_msg;
37817   CharPtr            bad_col_val_fmt = "Did not set value for column %d, line %d";
37818   CharPtr            num_affected_fmt = "%d fields affected";
37819   CharPtr            col_num_affected_fmt = "For %s (column %d), %d items were affected out of %d total";
37820   Int4 num_fields_affected = 0, col_num, line_num, num_this_column;
37821   Boolean success;
37822   ValNodePtr count_msg = NULL;
37823   MatchTypePtr match_type;
37824 
37825   count_list = CountObjectsForColumnFields (sep, columns);
37826   match_type = FindMatchTypeInHeader (columns);
37827   if (match_type->choice == eTableMatchAny && table->next != NULL) {
37828     /* skip first row, must contain header */
37829     table = table->next;
37830   }
37831 
37832   for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1;
37833        val_line_vnp != NULL && obj_line_vnp != NULL;
37834        val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) {
37835     val_vnp = val_line_vnp->data.ptrvalue;
37836     obj_vnp = obj_line_vnp->data.ptrvalue;
37837     col_vnp = columns;
37838     col_num = 1;
37839     count_vnp = count_affected_list;
37840     while (obj_vnp != NULL && col_vnp != NULL) {
37841       num_this_column = 0;
37842       if (obj_vnp->data.ptrvalue != NULL) {
37843         t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
37844         if (t == NULL || t->match_type != NULL
37845             || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
37846           /* ignore column or skip blank value */
37847         } else {
37848           if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
37849             val = "";
37850           } else {
37851             val = val_vnp->data.ptrvalue;
37852           }
37853           for (target_vnp = obj_vnp->data.ptrvalue; target_vnp != NULL; target_vnp = target_vnp->next) {
37854             if (val[0] == 0) {
37855               success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL);
37856             } else {
37857               if (t->field != NULL && t->field->choice == FieldType_molinfo_field) {
37858                 /* adjust molinfo fields */
37859                 success = FALSE;
37860                 if (target_vnp->choice == OBJ_BIOSEQ) {
37861                   tmp_field = MolinfoFieldFromFieldAndStringValue (t->field->data.ptrvalue, val_vnp->data.ptrvalue);
37862                   if (tmp_field != NULL) {
37863                     success = SetSequenceQualOnBioseq ((BioseqPtr) target_vnp->data.ptrvalue, tmp_field);
37864                     tmp_field = MolinfoFieldFree(tmp_field);
37865                   }
37866                 }
37867               } else if (t->field != NULL && t->field->choice == FieldType_source_qual
37868                          && (sq = (ValNodePtr)(t->field->data.ptrvalue)) != NULL
37869                          && sq->choice == SourceQualValChoice_location) {
37870                 /* adjust for source location */
37871                 success = FALSE;
37872                 tmp_field = SrcLocationFieldFromValue(val_vnp->data.ptrvalue);
37873                 if (tmp_field != NULL) {
37874                   success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, tmp_field, NULL,
37875                                                   val_vnp->data.ptrvalue, t->existing_text);
37876                   tmp_field = FieldTypeFree (tmp_field);
37877                 }
37878               } else {
37879                 success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL,
37880                                                   val_vnp->data.ptrvalue, t->existing_text);
37881               }
37882             }
37883             if (success) {
37884               num_fields_affected++;
37885               num_this_column++;
37886               if (t->match_mrna && IsFieldTypeCDSProduct (t->field)
37887                   && target_vnp->choice == OBJ_SEQFEAT) {
37888                 if (AdjustmRNAProductToMatchProteinProduct (target_vnp->data.ptrvalue)) {
37889                   num_fields_affected++;
37890                 }
37891               }
37892             } else {
37893               err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30));
37894               sprintf (err_msg, bad_col_val_fmt, col_num, line_num);
37895               ValNodeAddPointer (&err_list, 0, err_msg);
37896             }
37897           }
37898         }
37899       }
37900       if (val_vnp != NULL) {
37901         val_vnp = val_vnp->next;
37902       }
37903       if (count_vnp == NULL) {
37904         ValNodeAddInt (&count_affected_list, 0, num_this_column);
37905       } else {
37906         count_vnp->data.intvalue += num_this_column;
37907         count_vnp = count_vnp->next;
37908       }
37909       obj_vnp = obj_vnp->next;
37910       col_vnp = col_vnp->next;
37911       col_num++;
37912     }
37913   }
37914 
37915   /* put message at top of list for number of fields affected */
37916   err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15));
37917   sprintf (err_msg, num_affected_fmt, num_fields_affected);
37918   ValNodeAddPointer (&count_msg, 0, err_msg);
37919 
37920   /* if any affected, list number of fields per column, and the total in the record */
37921   if (num_fields_affected > 0) {
37922     for (count_vnp = count_affected_list, count_tot_vnp = count_list, col_vnp = columns, col_num = 1;
37923          count_vnp != NULL && count_tot_vnp != NULL && col_vnp != NULL;
37924          count_vnp = count_vnp->next, count_tot_vnp = count_tot_vnp->next, col_vnp = col_vnp->next, col_num++) {
37925       t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
37926       if (t != NULL && t->match_type == NULL) {
37927         qual_name = SummarizeFieldType (t->field);
37928         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_num_affected_fmt) + StringLen (qual_name) + 45));
37929         sprintf (err_msg, col_num_affected_fmt, qual_name, col_num, count_vnp->data.intvalue, count_tot_vnp->data.intvalue);
37930         ValNodeAddPointer (&count_msg, 0, err_msg);
37931         qual_name = MemFree (qual_name);
37932       }
37933     }
37934   }
37935 
37936   ValNodeLink (&count_msg, err_list);
37937 
37938   count_list = ValNodeFree (count_list);
37939   count_affected_list = ValNodeFree (count_affected_list);
37940 
37941   return count_msg;
37942 }
37943 
37944 
FindRowsForObjectInObjectTable(ValNodePtr obj_table,Int4 column,Uint1 choice,Pointer data)37945 static ValNodePtr FindRowsForObjectInObjectTable (ValNodePtr obj_table, Int4 column, Uint1 choice, Pointer data)
37946 {
37947   Int4 col_num, row_num;
37948   ValNodePtr line_vnp, col_vnp, obj_vnp;
37949   ValNodePtr match_rows = NULL;
37950 
37951   if (obj_table == NULL || column < 0) {
37952     return NULL;
37953   }
37954 
37955   for (line_vnp = obj_table, row_num = 0; line_vnp != NULL; line_vnp = line_vnp->next, row_num++) {
37956     col_vnp = line_vnp->data.ptrvalue;
37957     col_num = 0;
37958     while (col_num < column && col_vnp != NULL) {
37959       col_vnp = col_vnp->next;
37960       col_num++;
37961     }
37962     if (col_vnp != NULL) {
37963       obj_vnp = col_vnp->data.ptrvalue;
37964       while (obj_vnp != NULL && (obj_vnp->choice != choice || obj_vnp->data.ptrvalue != data)) {
37965         obj_vnp = obj_vnp->next;
37966       }
37967       if (obj_vnp != NULL) {
37968         ValNodeAddInt (&match_rows, 0, row_num);
37969       }
37970     }
37971   }
37972   return match_rows;
37973 }
37974 
37975 
FormatMultipleDestinationErrorMessage(Int4 col_num,ValNodePtr match_rows)37976 static CharPtr FormatMultipleDestinationErrorMessage (Int4 col_num, ValNodePtr match_rows)
37977 {
37978   CharPtr multi_fmt = "Multiple rows apply to the same object for column %d.  Matching rows:";
37979   CharPtr err_msg;
37980   Char    buf[16];
37981   ValNodePtr vnp;
37982 
37983   err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (multi_fmt)
37984                                                 + 30 + 15 * ValNodeLen (match_rows)));
37985   sprintf (err_msg, multi_fmt, col_num);
37986   for (vnp = match_rows; vnp != NULL; vnp = vnp->next) {
37987     sprintf (buf, "%d", vnp->data.intvalue + 1);
37988     StringCat (err_msg, buf);
37989     if (vnp->next != NULL) {
37990       StringCat (err_msg, ",");
37991     }
37992   }
37993   return err_msg;
37994 }
37995 
37996 
CheckObjTableForRowsThatApplyToTheSameDestination(ValNodePtr obj_table)37997 NLM_EXTERN ValNodePtr CheckObjTableForRowsThatApplyToTheSameDestination (ValNodePtr obj_table)
37998 {
37999   Int4 col_num;
38000   ValNodeBlock vnb, err_list;
38001   ValNodePtr line_vnp, col_vnp, obj_vnp, vnp;
38002   ValNodePtr col_obj_list;
38003   Boolean any_column_values_left;
38004   ValNodePtr match_rows;
38005 
38006   vnb.head = NULL;
38007   vnb.tail = NULL;
38008   err_list.head = NULL;
38009   err_list.tail = NULL;
38010 
38011   /* now, for each row, get pointer to first column */
38012   for (line_vnp = obj_table; line_vnp != NULL; line_vnp = line_vnp->next) {
38013     if (line_vnp->data.ptrvalue != NULL) {
38014       ValNodeAddPointerToEnd (&vnb, 0, line_vnp->data.ptrvalue);
38015     }
38016   }
38017 
38018   /* now for each column, make a list of all features in the column, then sort to see if there are duplicates */
38019   any_column_values_left = TRUE;
38020   col_num = 1;
38021   while (any_column_values_left) {
38022     any_column_values_left = FALSE;
38023     col_obj_list = NULL;
38024     for (vnp = vnb.head; vnp != NULL; vnp = vnp->next) {
38025       col_vnp = vnp->data.ptrvalue;
38026       if (col_vnp != NULL) {
38027         obj_vnp = col_vnp->data.ptrvalue;
38028         ValNodeLink (&col_obj_list, ValNodeCopyPtr (obj_vnp));
38029         vnp->data.ptrvalue = col_vnp->next;
38030         any_column_values_left = TRUE;
38031       }
38032     }
38033     if (col_obj_list != NULL) {
38034       col_obj_list = ValNodeSort (col_obj_list, SortVnpByChoiceAndPtrvalue);
38035       for (vnp = col_obj_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next) {
38036         if (vnp->choice == vnp->next->choice
38037             && vnp->data.ptrvalue == vnp->next->data.ptrvalue) {
38038           match_rows = FindRowsForObjectInObjectTable (obj_table, col_num - 1, vnp->choice, vnp->data.ptrvalue);
38039           /* report rows with matches */
38040           ValNodeAddPointerToEnd (&err_list, col_num, FormatMultipleDestinationErrorMessage (col_num, match_rows));
38041           match_rows = ValNodeFree (match_rows);
38042           /* skip over the cluster of matches */
38043           while (vnp->next != NULL && vnp->choice == vnp->next->choice) {
38044             vnp = vnp->next;
38045           }
38046         }
38047       }
38048       col_obj_list = ValNodeFree (col_obj_list);
38049     }
38050     col_num++;
38051   }
38052   vnb.head = ValNodeFree (vnb.head);
38053   return err_list.head;
38054 }
38055 
38056 
GetMatchTextForLine(ValNodePtr values,ValNodePtr columns)38057 static CharPtr GetMatchTextForLine (ValNodePtr values, ValNodePtr columns)
38058 {
38059   ValNodePtr val_vnp, col_vnp;
38060   CharPtr    match_txt = NULL;
38061   TabColumnConfigPtr t;
38062 
38063   for (val_vnp = values, col_vnp = columns;
38064        val_vnp != NULL && col_vnp != NULL;
38065        val_vnp = val_vnp->next, col_vnp = col_vnp->next) {
38066     t = col_vnp->data.ptrvalue;
38067     if (t != NULL && t->match_type != NULL) {
38068       match_txt = val_vnp->data.ptrvalue;
38069       break;
38070     }
38071   }
38072   return match_txt;
38073 }
38074 
38075 
38076 /* Note - when creating error messages, mark summary messages with choice = 1 */
CheckObjTableForExistingText(SeqEntryPtr sep,ValNodePtr table,ValNodePtr columns,ValNodePtr obj_table)38077 NLM_EXTERN ValNodePtr CheckObjTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table)
38078 {
38079   ValNodeBlock vnb;
38080   ValNodePtr val_line_vnp, obj_line_vnp;
38081   ValNodePtr val_vnp, obj_vnp, col_vnp;
38082   ValNodePtr col_tot = NULL, col_tot_vnp;
38083   Int4       line_num = 1, col_num, num_existing_text = 0;
38084   Uint2      entityID;
38085   TabColumnConfigPtr t;
38086   CharPtr            err_msg, str, qual_name, val;
38087   CharPtr            already_has_val_fmt = "%s\t%s\t%s\t%d\t%s\t%d";
38088   CharPtr            num_existing_text_fmt = "%d fields already have text.\nID\tOld Value\tReplacement\tColumn\tQualifier\tLine";
38089   CharPtr            mrna_warn_fmt = "%d coding region features have mRNAs, but %d do not.";
38090   CharPtr            col_tot_fmt = "For column %d, %d out of %d fields already have text.";
38091   ValNodePtr         target_list, feat_vnp;
38092   Int4               num_with_mrna = 0, num_without_mrna = 0;
38093   CharPtr            match_txt;
38094   CharPtr            new_val;
38095   MatchTypePtr       match_type;
38096 
38097   vnb.head = NULL;
38098   vnb.tail = NULL;
38099 
38100   if (sep == NULL) {
38101     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry"));
38102   }
38103   if (table == NULL) {
38104     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table"));
38105   }
38106   if (columns == NULL) {
38107     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information"));
38108   }
38109 
38110   match_type = FindMatchTypeInHeader (columns);
38111   if (match_type == NULL) {
38112     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("Must have match type"));
38113   } else if (table != NULL && match_type->choice == eTableMatchAny && table->next != NULL) {
38114     if (table->next->next != NULL) {
38115       ValNodeAddPointerToEnd (&vnb, 1, StringSave ("Table has too many rows for apply to all"));
38116     } else {
38117       /* skip first row, must contain header */
38118       table = table->next;
38119     }
38120   }
38121 
38122 
38123   if (vnb.head != NULL) {
38124     return vnb.head;
38125   }
38126 
38127   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
38128 
38129   for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1;
38130        val_line_vnp != NULL && obj_line_vnp != NULL;
38131        val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) {
38132     val_vnp = val_line_vnp->data.ptrvalue;
38133     obj_vnp = obj_line_vnp->data.ptrvalue;
38134     col_vnp = columns;
38135     if (val_vnp == NULL || obj_vnp == NULL) continue;
38136     col_num = 1;
38137     col_tot_vnp = col_tot;
38138     if (col_tot_vnp == NULL) {
38139       col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0);
38140     }
38141     while (obj_vnp != NULL && col_vnp != NULL) {
38142       if (obj_vnp->data.ptrvalue != NULL) {
38143         t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
38144         if (t == NULL || t->match_type != NULL
38145             || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
38146           /* ignore column or skip blank value */
38147         } else {
38148           target_list = obj_vnp->data.ptrvalue;
38149           if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
38150             val = "";
38151           } else {
38152             val = val_vnp->data.ptrvalue;
38153           }
38154           for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) {
38155             /* check for existing text */
38156             str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL);
38157             if (!StringHasNoText (str)) {
38158               qual_name = SummarizeFieldType (t->field);
38159               match_txt = GetMatchTextForLine (val_line_vnp->data.ptrvalue, columns);
38160               if (match_txt == NULL) {
38161                 match_txt = "";
38162               }
38163               new_val = StringSave (str);
38164               SetStringValue (&new_val, val, t->existing_text);
38165               err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt)
38166                                                            + StringLen (match_txt)
38167                                                            + StringLen (str)
38168                                                            + StringLen (new_val)
38169                                                            + StringLen (qual_name)
38170                                                            + 30));
38171               sprintf (err_msg, already_has_val_fmt, match_txt, str, new_val, col_num, qual_name, line_num);
38172               ValNodeAddPointerToEnd (&vnb, 0, err_msg);
38173               num_existing_text ++;
38174               new_val = MemFree (new_val);
38175               col_tot_vnp->data.intvalue ++;
38176             }
38177             str = MemFree (str);
38178             /* check for mrna if changing CDS product */
38179             if (IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) {
38180               if (GetmRNAForFeature (feat_vnp->data.ptrvalue) != NULL) {
38181                 num_with_mrna++;
38182               } else {
38183                 num_without_mrna++;
38184               }
38185             }
38186           }
38187         }
38188       }
38189       if (val_vnp != NULL) {
38190         val_vnp = val_vnp->next;
38191       }
38192       obj_vnp = obj_vnp->next;
38193       col_vnp = col_vnp->next;
38194       col_num++;
38195       col_tot_vnp = col_tot_vnp->next;
38196       if (col_tot_vnp == NULL) {
38197         col_tot_vnp = ValNodeAddInt (&col_tot, 0, 0);
38198       }
38199     }
38200   }
38201   if (num_existing_text > 0) {
38202     for (col_tot_vnp = col_tot, col_num = 1; col_tot_vnp != NULL; col_tot_vnp = col_tot_vnp->next, col_num++) {
38203       if (col_tot_vnp->data.intvalue > 0) {
38204         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_tot_fmt) + 45));
38205         sprintf (err_msg, col_tot_fmt, col_num, col_tot_vnp->data.intvalue, line_num - 1);
38206         ValNodeAddPointerToEnd (&vnb, 1, err_msg);
38207       }
38208     }
38209 
38210     err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt)
38211                                                 + 15));
38212     sprintf (err_msg, num_existing_text_fmt, num_existing_text);
38213     ValNodeAddPointerToFront (&vnb, 0, err_msg);
38214   }
38215   col_tot = ValNodeFree (col_tot);
38216   if (num_with_mrna > 0 && num_without_mrna > 0) {
38217     err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mrna_warn_fmt)
38218                                                 + 30));
38219     sprintf (err_msg, mrna_warn_fmt, num_with_mrna, num_without_mrna);
38220     ValNodeAddPointerToFront (&vnb, 1, err_msg);
38221   }
38222 
38223   return vnb.head;
38224 }
38225 
38226 
ApplyTableToFeatures(SeqEntryPtr sep,ValNodePtr table,ValNodePtr columns)38227 NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns)
38228 {
38229   ValNodeBlock vnb;
38230   ValNodePtr line_vnp, val_vnp, col_vnp;
38231   Int4       line_num = 1, col_num;
38232   Uint2      entityID;
38233   ValNodePtr match_list, match_choice, target_list, feat_vnp;
38234   TabColumnConfigPtr t;
38235   CharPtr            err_msg;
38236   CharPtr            no_match_fmt = "No match for %s, line %d";
38237   CharPtr            bad_col_val_fmt = "Did not set value for column %d, line %d";
38238   CharPtr            num_affected_fmt = "%d fields affected";
38239   Int4               num_fields_affected = 0;
38240   CharPtr            val;
38241   Boolean            success;
38242   MatchTypePtr       match_type;
38243 
38244   vnb.head = NULL;
38245   vnb.tail = NULL;
38246 
38247   if (sep == NULL) {
38248     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No SeqEntry"));
38249   }
38250   if (table == NULL) {
38251     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No table"));
38252   }
38253   if (columns == NULL) {
38254     ValNodeAddPointerToEnd (&vnb, 0, StringSave ("No column information"));
38255   }
38256   if (vnb.head != NULL) {
38257     return vnb.head;
38258   }
38259 
38260   match_type = FindMatchTypeInHeader (columns);
38261 
38262   if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) {
38263     /* skip first row, must contain header */
38264     table = table->next;
38265   }
38266 
38267   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
38268 
38269   for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) {
38270     match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns);
38271     if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) {
38272       ReportEmptyIDColumn (&vnb, line_num);
38273     } else {
38274       match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep);
38275       if (match_list == NULL) {
38276         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15));
38277         sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num);
38278         ValNodeAddPointerToEnd (&vnb, 0, err_msg);
38279       } else {
38280         for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
38281              col_vnp != NULL;
38282              col_vnp = col_vnp->next, col_num++) {
38283           t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
38284           if (t == NULL || t->match_type != NULL
38285               || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
38286             if (val_vnp != NULL) {
38287               val_vnp = val_vnp->next;
38288             }
38289             continue;
38290           }
38291 
38292           target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint);
38293           if (target_list == NULL) {
38294             ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num);
38295           } else {
38296             if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
38297               val = "";
38298             } else {
38299               val = val_vnp->data.ptrvalue;
38300             }
38301             for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) {
38302               if (val[0] == 0) {
38303                 success = RemoveFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL);
38304               } else {
38305                 success = SetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL,
38306                                                   val_vnp->data.ptrvalue, t->existing_text);
38307               }
38308               if (success) {
38309                 num_fields_affected++;
38310                 if (t->match_mrna && IsFieldTypeCDSProduct (t->field)
38311                     && feat_vnp->choice == OBJ_SEQFEAT) {
38312                   if (AdjustmRNAProductToMatchProteinProduct (feat_vnp->data.ptrvalue)) {
38313                     num_fields_affected++;
38314                   }
38315                 }
38316               } else {
38317                 err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30));
38318                 sprintf (err_msg, bad_col_val_fmt, col_num, line_num);
38319                 ValNodeAddPointerToEnd (&vnb, 0, err_msg);
38320               }
38321             }
38322           }
38323           target_list = ValNodeFree (target_list);
38324           if (val_vnp != NULL) {
38325             val_vnp = val_vnp->next;
38326           }
38327         }
38328       }
38329       match_list = ValNodeFree (match_list);
38330     }
38331   }
38332 
38333   err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15));
38334   sprintf (err_msg, num_affected_fmt, num_fields_affected);
38335   ValNodeAddPointerToFront (&vnb, 0, err_msg);
38336   match_type = MatchTypeFree (match_type);
38337 
38338   return vnb.head;
38339 }
38340 
CheckTableForExistingText(SeqEntryPtr sep,ValNodePtr table,ValNodePtr columns)38341 NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns)
38342 {
38343   ValNodeBlock vnb;
38344   ValNodePtr line_vnp, val_vnp, col_vnp;
38345   Int4       line_num = 1, col_num, num_existing_text = 0;
38346   Uint2      entityID;
38347   TabColumnConfigPtr t;
38348   CharPtr            err_msg, str, qual_name, val;
38349   CharPtr            no_match_fmt = "No match for %s, line %d";
38350   CharPtr            already_has_val_fmt = "%s already has value '%s' (column %d), line %d.  Replacement is '%s'";
38351   CharPtr            num_existing_text_fmt = "%d fields already have text.";
38352   ValNodePtr         match_choice, match_list;
38353   ValNodePtr         target_list, feat_vnp;
38354   MatchTypePtr       match_type;
38355 
38356   vnb.head = NULL;
38357   vnb.tail = NULL;
38358   if (sep == NULL) {
38359     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No SeqEntry"));
38360   }
38361   if (table == NULL) {
38362     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No table"));
38363   }
38364   if (columns == NULL) {
38365     ValNodeAddPointerToEnd (&vnb, 1, StringSave ("No column information"));
38366   }
38367   if (vnb.head != NULL) {
38368     return vnb.head;
38369   }
38370 
38371   match_type = FindMatchTypeInHeader (columns);
38372   if (match_type == NULL) return NULL;
38373 
38374   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
38375 
38376   if (match_type->choice == eTableMatchAny && table != NULL && table->next != NULL) {
38377     /* skip first row, must contain header */
38378     table = table->next;
38379   }
38380 
38381   for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) {
38382     match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns);
38383     if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) {
38384       ReportEmptyIDColumn (&vnb, line_num);
38385       if (vnb.head == NULL) {
38386         vnb.head = vnb.tail;
38387       }
38388     } else {
38389       match_list = FindMatchForRow (match_type, match_choice->data.ptrvalue, entityID, sep);
38390       if (match_list == NULL) {
38391         err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15));
38392         sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num);
38393         ValNodeAddPointerToEnd (&vnb, 0, err_msg);
38394       } else {
38395         for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
38396              col_vnp != NULL;
38397              col_vnp = col_vnp->next, col_num++) {
38398           t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
38399           if (t == NULL || t->match_type != NULL
38400               || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
38401             if (val_vnp != NULL) {
38402               val_vnp = val_vnp->next;
38403             }
38404             continue;
38405           }
38406           target_list = GetTargetListForRowAndColumn (match_type, match_list, t->field, t->constraint);
38407           if (target_list == NULL) {
38408             ReportMissingTargets (&vnb, t->field, match_choice->data.ptrvalue, col_num, line_num);
38409           } else {
38410             if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
38411               val = "";
38412             } else {
38413               val = val_vnp->data.ptrvalue;
38414             }
38415             for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) {
38416               str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL);
38417               if (!StringHasNoText (str)) {
38418                 qual_name = SummarizeFieldType (t->field);
38419                 err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt)
38420                                                             + StringLen (qual_name) + StringLen (str)
38421                                                             + StringLen (val)
38422                                                             + 30));
38423                 sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val);
38424                 ValNodeAddPointerToEnd (&vnb, col_num, err_msg);
38425                 num_existing_text ++;
38426               }
38427               str = MemFree (str);
38428             }
38429           }
38430           target_list = ValNodeFree (target_list);
38431           if (val_vnp != NULL) {
38432             val_vnp = val_vnp->next;
38433           }
38434         }
38435       }
38436       match_list = ValNodeFree (match_list);
38437     }
38438   }
38439   if (num_existing_text > 0) {
38440     err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt)
38441                                                 + 15));
38442     sprintf (err_msg, num_existing_text_fmt, num_existing_text);
38443     ValNodeAddPointerToFront (&vnb, 0, err_msg);
38444   }
38445 
38446   return vnb.head;
38447 }
38448 
38449 
38450 /* Reporting functions for SMART */
GetDescriptorPubTitles(SeqDescrPtr sdp,Pointer userdata)38451 static void GetDescriptorPubTitles (SeqDescrPtr sdp, Pointer userdata)
38452 {
38453   CharPtr title;
38454 
38455   if (sdp == NULL || sdp->choice != Seq_descr_pub || userdata == NULL) {
38456     return;
38457   }
38458 
38459   title = GetPubFieldFromObject (OBJ_SEQDESC, sdp, Publication_field_title, NULL);
38460   if (title != NULL) {
38461     ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title);
38462   }
38463 }
38464 
38465 
GetFeaturePubTitles(SeqFeatPtr sfp,Pointer userdata)38466 static void GetFeaturePubTitles (SeqFeatPtr sfp, Pointer userdata)
38467 {
38468   CharPtr title;
38469 
38470   if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB || userdata == NULL) {
38471     return;
38472   }
38473 
38474   title = GetPubFieldFromObject (OBJ_SEQFEAT, sfp, Publication_field_title, NULL);
38475   if (title != NULL) {
38476     ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, title);
38477   }
38478 }
38479 
38480 
GetPublicationTitlesInSep(SeqEntryPtr sep)38481 NLM_EXTERN ValNodePtr GetPublicationTitlesInSep (SeqEntryPtr sep)
38482 {
38483   ValNodePtr title_list = NULL;
38484 
38485   VisitDescriptorsInSep (sep, &title_list, GetDescriptorPubTitles);
38486   VisitFeaturesInSep (sep, &title_list, GetFeaturePubTitles);
38487   return title_list;
38488 }
38489 
38490 
GetPublicationTitlesOnSep(SeqEntryPtr sep)38491 NLM_EXTERN ValNodePtr GetPublicationTitlesOnSep (SeqEntryPtr sep)
38492 {
38493   ValNodePtr title_list = NULL;
38494 
38495   VisitDescriptorsOnSep (sep, &title_list, GetDescriptorPubTitles);
38496   VisitFeaturesOnSep (sep, &title_list, GetFeaturePubTitles);
38497   return title_list;
38498 }
38499 
38500 
GetBankitCommentsCallback(SeqDescrPtr sdp,Pointer userdata)38501 static void GetBankitCommentsCallback (SeqDescrPtr sdp, Pointer userdata)
38502 {
38503   UserObjectPtr uop;
38504   ObjectIdPtr   oip;
38505   UserFieldPtr  ufp;
38506 
38507   if (sdp == NULL || sdp->choice != Seq_descr_user || userdata == NULL) {
38508     return;
38509   }
38510 
38511   uop = (UserObjectPtr) sdp->data.ptrvalue;
38512   if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) {
38513     oip = uop->type;
38514     if (oip != NULL && StringCmp (oip->str, "Submission") == 0) {
38515       for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
38516         oip = ufp->label;
38517         if (oip != NULL
38518             && StringCmp (oip->str, "AdditionalComment") == 0
38519             && !StringHasNoText (ufp->data.ptrvalue)) {
38520           ValNodeAddPointer ((ValNodePtr PNTR) userdata, 0, StringSave (ufp->data.ptrvalue));
38521         }
38522       }
38523     }
38524   }
38525 }
38526 
38527 
GetBankitCommentsInSep(SeqEntryPtr sep)38528 NLM_EXTERN ValNodePtr GetBankitCommentsInSep (SeqEntryPtr sep)
38529 {
38530   ValNodePtr comment_list = NULL;
38531 
38532   VisitDescriptorsInSep (sep, &comment_list, GetBankitCommentsCallback);
38533   return comment_list;
38534 }
38535 
38536 
GetBankitCommentsOnSep(SeqEntryPtr sep)38537 NLM_EXTERN ValNodePtr GetBankitCommentsOnSep (SeqEntryPtr sep)
38538 {
38539   ValNodePtr comment_list = NULL;
38540 
38541   VisitDescriptorsOnSep (sep, &comment_list, GetBankitCommentsCallback);
38542   return comment_list;
38543 }
38544 
38545 
SplitPCRPrimersByPositionCallback(BioSourcePtr biop,Pointer data)38546 static void SplitPCRPrimersByPositionCallback (BioSourcePtr biop, Pointer data)
38547 {
38548   PCRReactionPtr ps, ps_next, ps_new;
38549   PCRPrimerPtr pp_f, pp_r;
38550 
38551   if (biop == NULL || biop->pcr_primers == NULL) {
38552     return;
38553   }
38554 
38555   for (ps = biop->pcr_primers; ps != NULL; ps = ps_next) {
38556     ps_next = ps->next;
38557 
38558     pp_f = ps->forward;
38559     pp_r = ps->reverse;
38560     while (pp_f != NULL && pp_r != NULL && pp_f->next != NULL && pp_r->next != NULL) {
38561       ps_new = PCRReactionNew ();
38562       ps_new->forward = pp_f->next;
38563       ps_new->reverse = pp_r->next;
38564       pp_f->next = NULL;
38565       pp_r->next = NULL;
38566       ps->next = ps_new;
38567       ps_new->next = ps_next;
38568       ps = ps_new;
38569       pp_f = ps->forward;
38570       pp_r = ps->reverse;
38571     }
38572   }
38573 }
38574 
38575 
SplitPCRPrimersByPosition(SeqEntryPtr sep)38576 NLM_EXTERN void SplitPCRPrimersByPosition (SeqEntryPtr sep)
38577 {
38578   VisitBioSourcesInSep (sep, NULL, SplitPCRPrimersByPositionCallback);
38579 }
38580 
38581 
MergePCRPrimersCallback(BioSourcePtr biop,Pointer data)38582 static void MergePCRPrimersCallback (BioSourcePtr biop, Pointer data)
38583 {
38584   PCRReactionPtr ps, ps_next;
38585   PCRPrimerPtr   pp_f_last, pp_r_last;
38586 
38587   if (biop == NULL || biop->pcr_primers == NULL || biop->pcr_primers->next == NULL) {
38588     return;
38589   }
38590 
38591   pp_f_last = biop->pcr_primers->forward;
38592   if (pp_f_last != NULL) {
38593     while (pp_f_last->next != NULL) {
38594       pp_f_last = pp_f_last->next;
38595     }
38596   }
38597   pp_r_last = biop->pcr_primers->reverse;
38598   if (pp_r_last != NULL) {
38599     while (pp_r_last->next != NULL) {
38600       pp_r_last = pp_r_last->next;
38601     }
38602   }
38603   ps = biop->pcr_primers->next;
38604   biop->pcr_primers->next = NULL;
38605 
38606   while (ps != NULL) {
38607     ps_next = ps->next;
38608     ps->next = NULL;
38609     if (ps->forward != NULL) {
38610       if (pp_f_last == NULL) {
38611         biop->pcr_primers->forward = ps->forward;
38612       } else {
38613         pp_f_last->next = ps->forward;
38614       }
38615       if (pp_f_last != NULL) {
38616         while (pp_f_last->next != NULL) {
38617           pp_f_last = pp_f_last->next;
38618         }
38619       }
38620       ps->forward = NULL;
38621     }
38622     if (ps->reverse != NULL) {
38623       if (pp_r_last == NULL) {
38624         biop->pcr_primers->reverse = ps->reverse;
38625       } else {
38626         pp_r_last->next = ps->reverse;
38627       }
38628       if (pp_r_last != NULL) {
38629         while (pp_r_last->next != NULL) {
38630           pp_r_last = pp_r_last->next;
38631         }
38632       }
38633       ps->reverse = NULL;
38634     }
38635     ps = PCRReactionFree (ps);
38636     ps = ps_next;
38637   }
38638 }
38639 
38640 
MergePCRPrimers(SeqEntryPtr sep)38641 NLM_EXTERN void MergePCRPrimers (SeqEntryPtr sep)
38642 {
38643   VisitBioSourcesInSep (sep, NULL, MergePCRPrimersCallback);
38644 }
38645 
38646 
ExtractPrimersByConstraint(PCRPrimerPtr PNTR pp_list,StringConstraintPtr scp)38647 static PCRPrimerPtr ExtractPrimersByConstraint (PCRPrimerPtr PNTR pp_list, StringConstraintPtr scp)
38648 {
38649   PCRPrimerPtr new_list = NULL, last_new = NULL, prev = NULL, pp, pp_next;
38650 
38651   if (pp_list == NULL || *pp_list == NULL) {
38652     return NULL;
38653   }
38654 
38655   pp = *pp_list;
38656   while (pp != NULL) {
38657     pp_next = pp->next;
38658     if (DoesStringMatchConstraint(pp->name, scp)) {
38659       if (prev == NULL) {
38660         *pp_list = pp->next;
38661       } else {
38662         prev->next = pp->next;
38663       }
38664       pp->next = NULL;
38665       if (last_new == NULL) {
38666         new_list = pp;
38667       } else {
38668         last_new->next = pp;
38669       }
38670       last_new = pp;
38671     } else {
38672       prev = pp;
38673     }
38674     pp = pp_next;
38675   }
38676   return new_list;
38677 }
38678 
38679 
38680 typedef struct stringconstraintpair {
38681   StringConstraintPtr scp1;
38682   StringConstraintPtr scp2;
38683 } StringConstraintPairData, PNTR StringConstraintPairPtr;
38684 
SplitPCRPrimersByConstraintsCallback(BioSourcePtr biop,Pointer data)38685 static void SplitPCRPrimersByConstraintsCallback (BioSourcePtr biop, Pointer data)
38686 {
38687   PCRReactionPtr ps, ps_new, last_ps = NULL;
38688   PCRPrimerPtr pp_match, last_fwd = NULL, last_rev = NULL;
38689   StringConstraintPairPtr pair;
38690 
38691   if (biop == NULL || biop->pcr_primers == NULL || (pair = (StringConstraintPairPtr) data) == NULL) {
38692     return;
38693   }
38694 
38695   ps_new = PCRReactionNew ();
38696 
38697   for (ps = biop->pcr_primers; ps != NULL; ps = ps->next) {
38698     /* take forward matches */
38699     pp_match = ExtractPrimersByConstraint (&(ps->forward), pair->scp1);
38700     if (pp_match != NULL) {
38701       if (last_fwd == NULL) {
38702         ps_new->forward = pp_match;
38703       } else {
38704         last_fwd->next = pp_match;
38705       }
38706       last_fwd = pp_match;
38707       while (last_fwd->next != NULL) {
38708         last_fwd = last_fwd->next;
38709       }
38710     }
38711     /* take reverse matches */
38712     pp_match = ExtractPrimersByConstraint (&(ps->reverse), pair->scp2);
38713     if (pp_match != NULL) {
38714       if (last_rev == NULL) {
38715         ps_new->reverse = pp_match;
38716       } else {
38717         last_rev->next = pp_match;
38718       }
38719       last_rev = pp_match;
38720       while (last_rev->next != NULL) {
38721         last_rev = last_rev->next;
38722       }
38723     }
38724     last_ps = ps;
38725   }
38726   if (ps_new->forward != NULL || ps_new->reverse != NULL) {
38727     last_ps->next = ps_new;
38728   } else {
38729     ps_new = PCRReactionFree (ps_new);
38730   }
38731 }
38732 
38733 
SplitPCRPrimersByConstraints(SeqEntryPtr sep,StringConstraintPtr scp_fwd,StringConstraintPtr scp_rev)38734 NLM_EXTERN void SplitPCRPrimersByConstraints (SeqEntryPtr sep, StringConstraintPtr scp_fwd, StringConstraintPtr scp_rev)
38735 {
38736   StringConstraintPairData pair;
38737 
38738   pair.scp1 = scp_fwd;
38739   pair.scp2 = scp_rev;
38740 
38741   VisitBioSourcesInSep (sep, &pair, SplitPCRPrimersByConstraintsCallback);
38742 }
38743 
38744 
38745 /* product name fixing rules */
38746 
CountSuspectRuleSet(SuspectRuleSetPtr set)38747 NLM_EXTERN Int4 CountSuspectRuleSet (SuspectRuleSetPtr set)
38748 {
38749   Int4 num = 0;
38750   while (set != NULL) {
38751     num++;
38752     set = set->next;
38753   }
38754   return num;
38755 }
38756 
38757 
38758 /* emptiness */
IsSearchFuncEmpty(SearchFuncPtr func)38759 NLM_EXTERN Boolean IsSearchFuncEmpty (SearchFuncPtr func)
38760 {
38761   Boolean rval = TRUE;
38762 
38763   if (func == NULL) {
38764     rval = TRUE;
38765   } else {
38766     switch (func->choice) {
38767       case SearchFunc_string_constraint:
38768         rval = IsStringConstraintEmpty (func->data.ptrvalue);
38769         break;
38770       case SearchFunc_prefix_and_numbers:
38771         rval = StringHasNoText (func->data.ptrvalue);
38772         break;
38773       default:
38774         rval = FALSE;
38775     }
38776   }
38777   return rval;
38778 }
38779 
38780 
IsSuspectRuleEmpty(SuspectRulePtr rule)38781 NLM_EXTERN Boolean IsSuspectRuleEmpty (SuspectRulePtr rule)
38782 {
38783   if (rule == NULL) {
38784     return TRUE;
38785   } else if (IsSearchFuncEmpty(rule->find)) {
38786     return TRUE;
38787   } else {
38788     return FALSE;
38789   }
38790 }
38791 
38792 
38793 /* summarization */
SummarizeSearchFunc(SearchFuncPtr func,Boolean short_version)38794 NLM_EXTERN CharPtr SummarizeSearchFunc (SearchFuncPtr func, Boolean short_version)
38795 {
38796   CharPtr summ = NULL;
38797   CharPtr bracket_fmt = "Contains %d or more brackets or parentheses";
38798   CharPtr prefix_fmt = "Is '%s' followed by numbers";
38799   CharPtr length_fmt = "Is longer than %d characters";
38800   CharPtr term_fmt = "Contains '%s' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'";
38801   CharPtr short_term_fmt = "Contains '%s'";
38802 
38803   if (func == NULL) {
38804     summ = StringSave ("No search function");
38805   } else {
38806     switch (func->choice) {
38807       case SearchFunc_string_constraint:
38808         summ = SummarizeStringConstraintEx (func->data.ptrvalue, short_version);
38809         break;
38810       case SearchFunc_contains_plural:
38811         summ = StringSave ("May contain plural");
38812         break;
38813       case SearchFunc_n_or_more_brackets_or_parentheses:
38814         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (bracket_fmt) + 15));
38815         sprintf (summ, bracket_fmt, func->data.intvalue);
38816         break;
38817       case SearchFunc_three_numbers:
38818         summ = StringSave ("Three or more numbers together");
38819         break;
38820       case SearchFunc_underscore:
38821         summ = StringSave ("Contains underscore");
38822         break;
38823       case SearchFunc_prefix_and_numbers:
38824         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (prefix_fmt) + StringLen (func->data.ptrvalue)));
38825         sprintf (summ, prefix_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue);
38826         break;
38827       case SearchFunc_all_caps:
38828         summ = StringSave ("Is all capital letters");
38829         break;
38830       case SearchFunc_unbalanced_paren:
38831         summ = StringSave ("Contains unbalanced brackets or parentheses");
38832         break;
38833       case SearchFunc_too_long:
38834         summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (length_fmt) + 15));
38835         sprintf (summ, length_fmt, func->data.intvalue);
38836         break;
38837       case SearchFunc_has_term:
38838         if (short_version) {
38839           summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (short_term_fmt) + StringLen (func->data.ptrvalue)));
38840           sprintf (summ, short_term_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue);
38841         } else {
38842           summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (term_fmt) + StringLen (func->data.ptrvalue)));
38843           sprintf (summ, term_fmt, func->data.ptrvalue == NULL ? "" : func->data.ptrvalue);
38844         }
38845         break;
38846       default:
38847         summ = StringSave ("Unknown search function");
38848         break;
38849     }
38850   }
38851   return summ;
38852 }
38853 
38854 
SummarizeReplaceFunc(ReplaceFuncPtr replace,Boolean short_version)38855 NLM_EXTERN CharPtr SummarizeReplaceFunc (ReplaceFuncPtr replace, Boolean short_version)
38856 {
38857   CharPtr summ = NULL;
38858   SimpleReplacePtr simple;
38859   CharPtr replace_fmt = "Replace %swith '%s'";
38860   CharPtr whole = "entire name ";
38861   CharPtr weasel_to_putative = ", retain and normalize 'putative' synonym";
38862   CharPtr haem_fmt = "Replace '%s' with 'heme' if whole word, 'hem' otherwise";
38863   Int4 len;
38864 
38865   if (replace == NULL) {
38866     return NULL;
38867   }
38868   switch (replace->choice) {
38869     case ReplaceFunc_simple_replace:
38870       simple = (SimpleReplacePtr) replace->data.ptrvalue;
38871       len = StringLen (replace_fmt) + StringLen (simple->replace) + 1;
38872       if (simple->whole_string) {
38873         len += StringLen (whole);
38874       }
38875       if (simple->weasel_to_putative && !short_version) {
38876         len += StringLen (weasel_to_putative);
38877       }
38878       summ = (CharPtr) MemNew (sizeof (Char) * len);
38879       sprintf (summ, replace_fmt,
38880                simple->whole_string ? whole : "" ,
38881                simple->replace == NULL ? "" : simple->replace);
38882       if (simple->weasel_to_putative && !short_version) {
38883         StringCat (summ, weasel_to_putative);
38884       }
38885       break;
38886     case ReplaceFunc_haem_replace:
38887       summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (haem_fmt) + StringLen (replace->data.ptrvalue)));
38888       sprintf (summ, haem_fmt, replace->data.ptrvalue);
38889       break;
38890     default:
38891       summ = StringSave ("Unknown replacement function");
38892       break;
38893   }
38894   return summ;
38895 }
38896 
38897 static CharPtr fix_type_names[] = {
38898   "None",
38899   "Typo",
38900   "Putative Typo",
38901   "Quick fix",
38902   "Organelles not appropriate in prokaryote",
38903   "Suspicious phrase; should this be nonfunctional?",
38904   "May contain database identifier more appropriate in note; remove from product name",
38905   "Remove organism from product name",
38906   "Possible parsing error or incorrect formatting; remove inappropriate symbols",
38907   "Implies evolutionary relationship; change to -like protein",
38908   "Consider adding 'protein' to the end of the product name",
38909   "Correct the name or use 'hypothetical protein'",
38910   "Use American spelling",
38911   "Use short product name instead of descriptive phrase",
38912   "use protein instead of gene as appropriate"
38913 };
38914 
SummarizeFixType(Uint2 fix_type)38915 NLM_EXTERN CharPtr SummarizeFixType (Uint2 fix_type)
38916 {
38917   if (fix_type < sizeof (fix_type_names) / sizeof (CharPtr)) {
38918     return fix_type_names[fix_type];
38919   } else {
38920     return "Unknown fix type";
38921   }
38922 }
38923 
38924 
SummarizeReplaceRule(ReplaceRulePtr replace,Boolean short_version)38925 NLM_EXTERN CharPtr SummarizeReplaceRule (ReplaceRulePtr replace, Boolean short_version)
38926 {
38927   CharPtr add_note = ", move original to note";
38928   CharPtr func;
38929   CharPtr summ = NULL;
38930   Int4 len;
38931 
38932   if (replace == NULL) {
38933     return NULL;
38934   }
38935   func = SummarizeReplaceFunc (replace->replace_func, short_version);
38936   len = StringLen (func) + 1;
38937   if (replace->move_to_note) {
38938     len += StringLen (add_note);
38939   }
38940   summ = (CharPtr) MemNew (sizeof (Char) * len);
38941   StringCpy (summ, func);
38942   if (replace->move_to_note) {
38943     StringCat (summ, add_note);
38944   }
38945 
38946   func = MemFree (func);
38947   return summ;
38948 }
38949 
38950 
SummarizeSuspectRuleEx(SuspectRulePtr rule,Boolean short_version)38951 NLM_EXTERN CharPtr SummarizeSuspectRuleEx (SuspectRulePtr rule, Boolean short_version)
38952 {
38953   CharPtr find = NULL, replace = NULL, fix_type = NULL, feat_constraint = NULL, except = NULL;
38954   CharPtr summ = NULL;
38955   CharPtr tmp = NULL;
38956   CharPtr butnot = " but not ";
38957   CharPtr desc = " Description: ";
38958   CharPtr fatal = "(FATAL)";
38959   Int4 len;
38960 
38961   if (rule == NULL) {
38962     return NULL;
38963   }
38964 
38965   if (!short_version && rule->rule_type != Fix_type_none) {
38966     fix_type = SummarizeFixType (rule->rule_type);
38967   }
38968 
38969   if (short_version && !StringHasNoText (rule->description)) {
38970     if (fix_type == NULL) {
38971       summ = StringSave (rule->description);
38972     } else {
38973       len = StringLen (fix_type) + StringLen (rule->description) + 4;
38974       summ = (CharPtr) MemNew (sizeof (Char) * len);
38975       StringCpy (summ, rule->description);
38976       StringCat (summ, " (");
38977       StringCat (summ, fix_type);
38978       StringCat (summ, ")");
38979     }
38980     if (rule->fatal) {
38981       len = StringLen (summ) + StringLen (fatal) + 1;
38982       tmp = (CharPtr) MemNew (sizeof (Char) * len);
38983       StringCpy (tmp, summ);
38984       StringCat (tmp, fatal);
38985       summ = (CharPtr) MemFree (summ);
38986       summ = tmp;
38987     }
38988     return summ;
38989   }
38990 
38991 
38992   find = SummarizeSearchFunc (rule->find, short_version);
38993   if (!IsSearchFuncEmpty(rule->except)) {
38994     except = SummarizeSearchFunc (rule->except, short_version);
38995   }
38996 
38997   if (!short_version) {
38998     feat_constraint = SummarizeConstraintSet (rule->feat_constraint);
38999   }
39000 
39001   if (!short_version || rule->rule_type == Fix_type_typo) {
39002     replace = SummarizeReplaceRule (rule->replace, short_version);
39003   }
39004 
39005   len = StringLen (find) + StringLen (except) + StringLen (feat_constraint)
39006              + StringLen (replace) + 3;
39007   if (fix_type != NULL) {
39008     len = len + StringLen (fix_type) + 3;
39009   }
39010   if (!StringHasNoText (rule->description)) {
39011     len += StringLen (rule->description) + StringLen (desc);
39012   }
39013   if (feat_constraint != NULL) {
39014     len += 2;
39015   }
39016   if (except != NULL) {
39017     len += StringLen (butnot);
39018   }
39019   if (rule->fatal) {
39020     len += StringLen(fatal);
39021   }
39022   summ = (CharPtr) MemNew (sizeof (Char) * len);
39023   StringCpy (summ, find);
39024   if (except != NULL) {
39025     StringCat (summ, butnot);
39026     StringCat (summ, except);
39027   }
39028 
39029   if (feat_constraint != NULL) {
39030     StringCat (summ, ", ");
39031     StringCat (summ, feat_constraint);
39032   }
39033 
39034   if (replace != NULL) {
39035     StringCat (summ, ", ");
39036     StringCat (summ, replace);
39037   }
39038 
39039   if (fix_type != NULL) {
39040     StringCat (summ, " (");
39041     StringCat (summ, fix_type);
39042     StringCat (summ, ")");
39043   }
39044 
39045   if (!StringHasNoText (rule->description)) {
39046     StringCat (summ, desc);
39047     StringCat (summ, rule->description);
39048   }
39049   if (rule->fatal) {
39050     StringCat (summ, fatal);
39051   }
39052 
39053   find = MemFree (find);
39054   except = MemFree (except);
39055   feat_constraint = MemFree (feat_constraint);
39056   replace = MemFree (replace);
39057   return summ;
39058 }
39059 
39060 
SummarizeSuspectRule(SuspectRulePtr rule)39061 NLM_EXTERN CharPtr SummarizeSuspectRule (SuspectRulePtr rule)
39062 {
39063   return SummarizeSuspectRuleEx (rule, FALSE);
39064 }
39065 
39066 
StringMayContainPlural(CharPtr search)39067 NLM_EXTERN Boolean StringMayContainPlural (CharPtr search)
39068 {
39069   CharPtr cp;
39070   Char    last_letter, second_to_last_letter, next_letter;
39071   Int4    word_len = 0;
39072   Boolean may_contain_plural = FALSE;
39073   CharPtr word_skip = " ,";
39074 
39075   if (search == NULL) return FALSE;
39076   cp = search;
39077   while (*cp != 0 && !may_contain_plural) {
39078     word_len = StringCSpn (cp, word_skip);
39079     last_letter = *(cp + word_len - 1);
39080     if (last_letter == 's') {
39081       if (word_len >=5 && StringNCmp (cp + word_len - 5, "trans", 5) == 0) {
39082         /* not plural */
39083         cp = cp + word_len;
39084         cp += StringSpn (cp, word_skip);
39085       } else if (word_len > 3
39086                  && (second_to_last_letter = *(cp + word_len - 2)) != 's'
39087                  && second_to_last_letter != 'i'
39088                  && second_to_last_letter != 'u'
39089                  && ((next_letter = *(cp + word_len)) == ',' || next_letter == 0)) {
39090         may_contain_plural = TRUE;
39091       } else {
39092         cp = cp + word_len;
39093         cp += StringSpn (cp, word_skip);
39094       }
39095     } else {
39096       cp = cp + word_len;
39097       cp += StringSpn (cp, word_skip);
39098     }
39099   }
39100   return may_contain_plural;
39101 }
39102 
39103 
FindFirstOpen(CharPtr cp)39104 static CharPtr FindFirstOpen (CharPtr cp)
39105 {
39106   CharPtr pa, ba;
39107 
39108   if (cp == NULL) {
39109     return NULL;
39110   }
39111   pa = StringChr (cp, '(');
39112   ba = StringChr (cp, '[');
39113   if (pa == NULL) {
39114     return ba;
39115   } else if (ba == NULL || ba > pa) {
39116     return pa;
39117   } else {
39118     return ba;
39119   }
39120 }
39121 
39122 
GetClose(Char ch)39123 static Char GetClose (Char ch)
39124 {
39125   if (ch == '(') {
39126     return ')';
39127   } else if (ch == '[') {
39128     return ']';
39129   } else if (ch == '{') {
39130     return '}';
39131   } else {
39132     return ch;
39133   }
39134 }
39135 
39136 
SkipBracketOrParen(CharPtr bp,CharPtr start,CharPtr PNTR skip_to)39137 static Boolean SkipBracketOrParen (CharPtr bp, CharPtr start, CharPtr PNTR skip_to)
39138 {
39139   Boolean rval = FALSE;
39140   CharPtr ep, ns;
39141 
39142   if (bp - start > 2 && StringNCmp (bp - 3, "NAD(P)", 6) == 0) {
39143     rval = TRUE;
39144     *skip_to = bp + 6;
39145   } else if (StringNCmp (bp, "(NAD(P)H)", 9) == 0) {
39146     rval = TRUE;
39147     *skip_to = bp + 9;
39148   } else if (StringNCmp (bp, "(NAD(P))", 8) == 0) {
39149     rval = TRUE;
39150     *skip_to = bp + 8;
39151   } else if (StringNCmp (bp, "(I)", 3) == 0) {
39152     rval = TRUE;
39153     *skip_to = bp + 4;
39154   } else if (StringNCmp (bp, "(II)", 4) == 0) {
39155     rval = TRUE;
39156     *skip_to = bp + 5;
39157   } else if (StringNCmp (bp, "(III)", 5) == 0) {
39158     rval = TRUE;
39159     *skip_to = bp + 6;
39160   } else if (StringNCmp (bp, "(NADPH)", 7) == 0) {
39161     rval = TRUE;
39162     *skip_to = bp + 7;
39163   } else if (StringNCmp (bp, "(NAD+)", 6) == 0) {
39164     rval = TRUE;
39165     *skip_to = bp + 6;
39166   } else if (StringNCmp (bp, "(NAPPH/NADH)", 12) == 0) {
39167     rval = TRUE;
39168     *skip_to = bp + 12;
39169   } else if (StringNCmp (bp, "(NADP+)", 7) == 0) {
39170     rval = TRUE;
39171     *skip_to = bp + 7;
39172   } else if (StringNCmp (bp, "[acyl-carrier protein]", 22) == 0) {
39173     rval = TRUE;
39174     *skip_to = bp + 22;
39175   } else if (StringNCmp (bp, "[acyl-carrier-protein]", 22) == 0) {
39176     rval = TRUE;
39177     *skip_to = bp + 22;
39178   } else if (StringNCmp (bp, "(acyl carrier protein)", 22) == 0) {
39179     rval = TRUE;
39180     *skip_to = bp + 22;
39181   } else {
39182     ns = StringChr (bp + 1, *bp);
39183     ep = StringChr (bp + 1, GetClose(*bp));
39184     if (ep != NULL && (ns == NULL || ns > ep)) {
39185       if (ep - bp < 5) {
39186         rval = TRUE;
39187         *skip_to = ep + 1;
39188       } else if (ep - bp > 3 && StringNCmp (ep - 3, "ing", 3) == 0) {
39189         rval = TRUE;
39190         *skip_to = ep + 1;
39191       }
39192     }
39193   }
39194   return rval;
39195 }
39196 
39197 
ContainsNorMoreSetsOfBracketsOrParentheses(CharPtr search,Int4 n)39198 NLM_EXTERN Boolean ContainsNorMoreSetsOfBracketsOrParentheses (CharPtr search, Int4 n)
39199 {
39200   CharPtr cp, end;
39201   Int4    num_found = 0;
39202 
39203   if (search == NULL) {
39204     return FALSE;
39205   }
39206 
39207   cp = FindFirstOpen(search);
39208   while (num_found < n && cp != NULL && *cp != 0) {
39209     if (SkipBracketOrParen(cp, search, &cp)) {
39210       /* ignore it */
39211       cp = FindFirstOpen (cp);
39212     } else if ((end = StringChr (cp, GetClose (*cp))) == NULL) {
39213       /* skip, doesn't close the bracket */
39214       cp = FindFirstOpen (cp + 1);
39215     } else {
39216       cp = FindFirstOpen (end);
39217       num_found ++;
39218     }
39219   }
39220 
39221   if (num_found >= n) {
39222     return TRUE;
39223   } else {
39224     return FALSE;
39225   }
39226 }
39227 
39228 
FollowedByFamily(CharPtr PNTR str)39229 static Boolean FollowedByFamily (CharPtr PNTR str)
39230 {
39231   Int4 word_len;
39232 
39233   if (str == NULL || *str == NULL || **str == 0) {
39234     return FALSE;
39235   }
39236 
39237   word_len = StringCSpn (*str + 1, " ");
39238   if (*(*str + word_len + 1) != 0 && StringNCmp (*str + word_len + 2, "family", 6) == 0) {
39239     *str = *str + word_len + 7;
39240     return TRUE;
39241   } else {
39242     return FALSE;
39243   }
39244 }
39245 
39246 
InWordBeforeCytochromeOrCoenzyme(CharPtr cp,CharPtr start)39247 static Boolean InWordBeforeCytochromeOrCoenzyme (CharPtr cp, CharPtr start)
39248 {
39249   if (cp == NULL) {
39250     return FALSE;
39251   }
39252 
39253   while (cp > start && !isspace (*cp)) {
39254     cp--;
39255   }
39256   if (cp == start) {
39257     return FALSE;
39258   }
39259   while (cp > start && isspace (*cp)) {
39260     cp--;
39261   }
39262   if (cp - start >= 9 && StringNICmp (cp - 9, "cytochrome", 10) == 0) {
39263     return TRUE;
39264   } else if (cp - start >= 7 && StringNCmp (cp - 7, "coenzyme", 8) == 0) {
39265     return TRUE;
39266   } else {
39267     return FALSE;
39268   }
39269 }
39270 
39271 
PrecededByPrefix(CharPtr search,CharPtr cp,CharPtr prefix)39272 static Boolean PrecededByPrefix (CharPtr search, CharPtr cp, CharPtr prefix)
39273 {
39274   Int4 len;
39275 
39276   if (search == NULL || cp == NULL || StringHasNoText (prefix)) {
39277     return FALSE;
39278   }
39279   len = StringLen (prefix);
39280   if (cp - search >= len && StringNCmp (cp - len, prefix, len) == 0) {
39281     return TRUE;
39282   } else {
39283     return FALSE;
39284   }
39285 }
39286 
39287 
39288 static CharPtr OkNumberPrefix[] = {"DUF", "UPF", "IS", "TIGR", "UCP", "PUF", "CHP", NULL };
PrecededByOkPrefix(CharPtr search,CharPtr p)39289 static Boolean PrecededByOkPrefix (CharPtr search, CharPtr p)
39290 {
39291   Int4 i;
39292   Boolean rval = FALSE;
39293 
39294   for (i = 0; OkNumberPrefix[i] != NULL && !rval; i++) {
39295     if (PrecededByPrefix (search, p, OkNumberPrefix[i])) {
39296       rval = TRUE;
39297     }
39298   }
39299   return rval;
39300 }
39301 
39302 
ContainsThreeOrMoreNumbersTogether(CharPtr search)39303 NLM_EXTERN Boolean ContainsThreeOrMoreNumbersTogether (CharPtr search)
39304 {
39305   CharPtr p;
39306   Int4 num_digits = 0;
39307 
39308   if (search == NULL) {
39309     return FALSE;
39310   }
39311 
39312   p = search;
39313   while (*p != 0) {
39314     if (isdigit (*p)) {
39315       if (PrecededByOkPrefix(search, p)) {
39316         p += StrSpn (p, "0123456789") - 1;
39317         num_digits = 0;
39318       } else if (InWordBeforeCytochromeOrCoenzyme (p, search)) {
39319         p += StrSpn (p, "0123456789") - 1;
39320         num_digits = 0;
39321       } else {
39322         num_digits ++;
39323         if (num_digits == 3) {
39324           if (FollowedByFamily (&p)) {
39325             num_digits = 0;
39326           } else {
39327             return TRUE;
39328           }
39329         }
39330       }
39331     } else {
39332       num_digits = 0;
39333     }
39334     p++;
39335   }
39336   return FALSE;
39337 }
39338 
39339 
StringContainsUnderscore(CharPtr search)39340 NLM_EXTERN Boolean StringContainsUnderscore (CharPtr search)
39341 {
39342   CharPtr cp;
39343 
39344   if (search == NULL) {
39345     return FALSE;
39346   }
39347 
39348   cp = StringChr (search, '_');
39349   while (cp != NULL) {
39350     if (FollowedByFamily (&cp)) {
39351       /* search again */
39352       cp = StringChr (cp, '_');
39353     } else if (cp - search < 3 || *(cp + 1) == 0) {
39354       return TRUE;
39355     } else if ((StringNCmp (cp - 3, "MFS", 3) == 0
39356                 || StringNCmp (cp - 3, "TPR", 3) == 0
39357                 || StringNCmp (cp - 3, "AAA", 3) == 0)
39358                 && isdigit (*(cp + 1)) && !isdigit (*(cp + 2))) {
39359       cp = StringChr (cp + 1, '_');
39360     } else {
39361       return TRUE;
39362     }
39363   }
39364   return FALSE;
39365 }
39366 
39367 
ProductContainsTerm(CharPtr pattern,CharPtr search)39368 NLM_EXTERN Boolean ProductContainsTerm (CharPtr pattern, CharPtr search)
39369 {
39370   CharPtr str;
39371 
39372   /* don't bother searching for c-term or n-term if product name contains "domain" */
39373   if (StringISearch (search, "domain") != NULL) {
39374     return FALSE;
39375   }
39376 
39377   str = StringISearch(search, pattern);
39378   /* c-term and n-term must be either first word or separated from other word by space, num, or punct */
39379   if (str != NULL && (str == search || !isalpha (*(str - 1)))) {
39380     return TRUE;
39381   } else {
39382     return FALSE;
39383   }
39384 }
39385 
39386 
IsPrefixPlusNumbers(CharPtr prefix,CharPtr search)39387 NLM_EXTERN Boolean IsPrefixPlusNumbers (CharPtr prefix, CharPtr search)
39388 {
39389   Int4 pattern_len, digit_len;
39390 
39391   if (search == NULL) {
39392     return FALSE;
39393   }
39394   pattern_len = StringLen (prefix);
39395   if (pattern_len > 0 && StringNCmp (search, prefix, pattern_len) != 0) {
39396     return FALSE;
39397   }
39398 
39399   digit_len = StringSpn (search + pattern_len, "1234567890");
39400   if (digit_len > 0 && *(search + pattern_len + digit_len) == 0) {
39401     return TRUE;
39402   } else {
39403     return FALSE;
39404   }
39405 }
39406 
39407 
StringContainsUnbalancedParentheses(CharPtr search)39408 NLM_EXTERN Boolean StringContainsUnbalancedParentheses (CharPtr search)
39409 {
39410   CharPtr buffer, cp_src;
39411   Int4    pos = 0;
39412   Boolean is_bad = FALSE;
39413 
39414   if (search == NULL) {
39415     return FALSE;
39416   }
39417 
39418   /* note - don't need space for terminating character */
39419   buffer = MemNew (sizeof (Char) * StringLen (search));
39420   cp_src = search;
39421   while (*cp_src != 0 && !is_bad) {
39422     if (*cp_src == '(' || *cp_src == '[') {
39423       buffer[pos++] = *cp_src;
39424     } else if (*cp_src == ')') {
39425       if (pos < 1) {
39426         is_bad = TRUE;
39427       } else if (buffer[pos - 1] != '(') {
39428         is_bad = TRUE;
39429       } else {
39430         pos --;
39431       }
39432     } else if (*cp_src == ']') {
39433       if (pos < 1) {
39434         is_bad = TRUE;
39435       } else if (buffer[pos - 1] != '[') {
39436         is_bad = TRUE;
39437       } else {
39438         pos--;
39439       }
39440     }
39441     ++cp_src;
39442   }
39443 
39444   if (pos > 0) {
39445     is_bad = TRUE;
39446   }
39447   buffer = MemFree (buffer);
39448   return is_bad;
39449 }
39450 
39451 
MatchesSearchFunc(CharPtr str,SearchFuncPtr search)39452 static Boolean MatchesSearchFunc (CharPtr str, SearchFuncPtr search)
39453 {
39454   Boolean rval = FALSE;
39455 
39456   if (str == NULL) {
39457     return FALSE;
39458   } else if (search == NULL) {
39459     return TRUE;
39460   }
39461 
39462   switch (search->choice) {
39463     case SearchFunc_string_constraint:
39464       rval = DoesStringMatchConstraint(str, (StringConstraintPtr) search->data.ptrvalue);
39465       break;
39466     case SearchFunc_contains_plural:
39467       rval = StringMayContainPlural (str);
39468       break;
39469     case SearchFunc_n_or_more_brackets_or_parentheses:
39470       rval = ContainsNorMoreSetsOfBracketsOrParentheses (str, search->data.intvalue);
39471       break;
39472     case SearchFunc_three_numbers:
39473       rval = ContainsThreeOrMoreNumbersTogether (str);
39474       break;
39475     case SearchFunc_underscore:
39476       rval = StringContainsUnderscore (str);
39477       break;
39478     case SearchFunc_prefix_and_numbers:
39479       rval = IsPrefixPlusNumbers (search->data.ptrvalue, str);
39480       break;
39481     case SearchFunc_all_caps:
39482       rval = IsAllCaps (str);
39483       break;
39484     case SearchFunc_unbalanced_paren:
39485       rval = StringContainsUnbalancedParentheses (str);
39486       break;
39487     case SearchFunc_too_long:
39488       if (StringISearch (str, "bifunctional") == NULL && StringISearch (str, "multifunctional") == NULL
39489           && StringLen (str) > (Uint4) search->data.intvalue) {
39490         rval = TRUE;
39491       }
39492       break;
39493     case SearchFunc_has_term:
39494       rval = ProductContainsTerm (search->data.ptrvalue, str);
39495       break;
39496   }
39497   return rval;
39498 }
39499 
39500 
MatchesSuspectProductRule(CharPtr str,SuspectRulePtr rule)39501 static Boolean MatchesSuspectProductRule (CharPtr str, SuspectRulePtr rule)
39502 {
39503   if (str == NULL) {
39504     return FALSE;
39505   } else if (rule == NULL) {
39506     return TRUE;
39507   }
39508 
39509   if (!IsSearchFuncEmpty(rule->find) && !MatchesSearchFunc(str, rule->find)) {
39510     return FALSE;
39511   } else if (!IsSearchFuncEmpty(rule->except) && MatchesSearchFunc (str, rule->except)) {
39512     return FALSE;
39513   } else {
39514     return TRUE;
39515   }
39516 }
39517 
39518 
39519 typedef struct suspectrulecallback {
39520   SuspectRuleSetPtr rules;
39521   ValNodePtr        obj_lists;
39522   Uint2             featdef;
39523 } SuspectRuleCallbackData, PNTR SuspectRuleCallbackPtr;
39524 
39525 
DoesStringMatchSuspectRule(CharPtr str,SeqFeatPtr sfp,SuspectRulePtr rule)39526 NLM_EXTERN Boolean DoesStringMatchSuspectRule (CharPtr str, SeqFeatPtr sfp, SuspectRulePtr rule)
39527 {
39528   BioseqPtr bsp;
39529   SeqFeatPtr cds;
39530   Boolean rval = FALSE;
39531 
39532   if (rule == NULL) {
39533     return TRUE;
39534   }
39535 
39536   if (MatchesSuspectProductRule(str, rule)) {
39537     /* we want to list the coding region, rather than the protein feature, if we can */
39538     if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
39539       bsp = BioseqFindFromSeqLoc (sfp->location);
39540       if (bsp != NULL) {
39541         cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
39542         if (cds != NULL) {
39543           sfp = cds;
39544         }
39545       }
39546     }
39547     if (sfp == NULL) {
39548       if (rule->feat_constraint == NULL) {
39549         rval = TRUE;
39550       }
39551     } else if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, rule->feat_constraint)) {
39552       rval = TRUE;
39553     }
39554   }
39555   return rval;
39556 }
39557 
39558 
SuspectRuleFeatCallback(SeqFeatPtr sfp,Pointer data)39559 static void SuspectRuleFeatCallback (SeqFeatPtr sfp, Pointer data)
39560 {
39561   SuspectRuleCallbackPtr s;
39562   ProtRefPtr     prp;
39563   SuspectRulePtr rule;
39564   ValNodePtr     vnp;
39565   SeqFeatPtr     cds;
39566   BioseqPtr      bsp;
39567   ValNodePtr     list;
39568   SeqFeatPtr     report_sfp = sfp;
39569   CharPtr        check_val = NULL;
39570 
39571   if (sfp == NULL
39572       || (s = (SuspectRuleCallbackPtr) data) == NULL
39573       || sfp->idx.subtype != s->featdef) {
39574     return;
39575   }
39576 
39577   if (s->featdef == FEATDEF_PROT) {
39578     prp = (ProtRefPtr) sfp->data.value.ptrvalue;
39579     if (prp == NULL || prp->name == NULL) {
39580       return;
39581     }
39582     check_val = prp->name->data.ptrvalue;
39583     /* we want to list the coding region, rather than the protein feature, if we can */
39584     bsp = BioseqFindFromSeqLoc (sfp->location);
39585     if (bsp != NULL) {
39586       cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
39587       if (cds != NULL) {
39588         report_sfp = cds;
39589       }
39590     }
39591   } else if (s->featdef == FEATDEF_rRNA) {
39592     check_val = GetRNAProductString (sfp, NULL);
39593   }
39594 
39595   for (rule = s->rules, vnp = s->obj_lists; rule != NULL; rule = rule->next, vnp = vnp->next) {
39596     /* make sure we have space in the object lists */
39597     if (vnp == NULL) {
39598       vnp = ValNodeNew (s->obj_lists);
39599       if (s->obj_lists == NULL) {
39600         s->obj_lists = vnp;
39601       }
39602     }
39603 
39604     if (MatchesSuspectProductRule (check_val, rule)) {
39605       if (DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, report_sfp, rule->feat_constraint)) {
39606         list = vnp->data.ptrvalue;
39607         ValNodeAddPointer (&list, OBJ_SEQFEAT, report_sfp);
39608         vnp->data.ptrvalue = list;
39609       }
39610     }
39611   }
39612 }
39613 
39614 
GetFeaturesForSuspectRules(SeqEntryPtr sep,SuspectRuleSetPtr rules,Uint2 featdef)39615 NLM_EXTERN ValNodePtr GetFeaturesForSuspectRules (SeqEntryPtr sep, SuspectRuleSetPtr rules, Uint2 featdef)
39616 {
39617   SuspectRuleCallbackData d;
39618 
39619   MemSet (&d, 0, sizeof (SuspectRuleCallbackData));
39620   d.obj_lists = NULL;
39621   d.rules = rules;
39622   d.featdef = featdef;
39623 
39624   VisitFeaturesInSep (sep, &d, SuspectRuleFeatCallback);
39625   return d.obj_lists;
39626 }
39627 
39628 
FreeListOfObjectLists(ValNodePtr list)39629 NLM_EXTERN ValNodePtr FreeListOfObjectLists (ValNodePtr list)
39630 {
39631   ValNodePtr vnp;
39632 
39633   for (vnp = list; vnp != NULL; vnp = vnp->next) {
39634     vnp->data.ptrvalue = FreeObjectList (vnp->data.ptrvalue);
39635   }
39636   list = ValNodeFree (list);
39637   return list;
39638 }
39639 
39640 
ApplySuspectProductNameFixToString(SuspectRulePtr rule,CharPtr PNTR str)39641 NLM_EXTERN Boolean ApplySuspectProductNameFixToString (SuspectRulePtr rule, CharPtr PNTR str)
39642 {
39643   SimpleReplacePtr simple_replace;
39644   Boolean          rval = FALSE;
39645   Boolean          use_putative = FALSE;
39646   CharPtr          orig;
39647 
39648   if (str == NULL || rule == NULL || rule->replace == NULL || rule->replace->replace_func == NULL) {
39649     return FALSE;
39650   }
39651 
39652   switch (rule->replace->replace_func->choice) {
39653     case ReplaceFunc_simple_replace:
39654       simple_replace = (SimpleReplacePtr) rule->replace->replace_func->data.ptrvalue;
39655       if (simple_replace != NULL) {
39656         if (simple_replace->weasel_to_putative) {
39657           if (SkipWeasel(*str) != *str) {
39658             use_putative = TRUE;
39659           }
39660         }
39661 
39662         if (rule->find == NULL || rule->find->choice != SearchFunc_string_constraint) {
39663           *str = MemFree (*str);
39664           *str = StringSave (simple_replace->replace);
39665           rval = TRUE;
39666         } else if (simple_replace->whole_string && DoesStringMatchConstraint (*str, rule->find->data.ptrvalue)) {
39667           *str = MemFree (*str);
39668           *str = StringSave (simple_replace->replace);
39669           rval = TRUE;
39670         } else {
39671           rval = ReplaceStringConstraintPortionInString (str, simple_replace->replace, rule->find->data.ptrvalue);
39672         }
39673         if (use_putative && StringNCmp (*str, kPutative, StringLen (kPutative)) != 0) {
39674           SetStringValue (str, kPutative, ExistingTextOption_prefix_space);
39675         }
39676       }
39677       break;
39678     case ReplaceFunc_haem_replace:
39679       orig = StringSave (*str);
39680       FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "heme", FALSE, TRUE);
39681       FindReplaceString (str, rule->replace->replace_func->data.ptrvalue, "hem", FALSE, FALSE);
39682       if (StringCmp (orig, *str) != 0) {
39683         rval = TRUE;
39684       }
39685       orig = MemFree (orig);
39686       break;
39687   }
39688   return rval;
39689 }
39690 
39691 
ApplySuspectProductNameFixToFeature(SuspectRulePtr rule,SeqFeatPtr cds,FILE * fp)39692 NLM_EXTERN Boolean ApplySuspectProductNameFixToFeature (SuspectRulePtr rule, SeqFeatPtr cds, FILE *fp)
39693 {
39694   BioseqPtr  protbsp;
39695   SeqFeatPtr protfeat;
39696   SeqMgrFeatContext context;
39697   ProtRefPtr        prp;
39698   CharPtr           new_name, desc;
39699   Boolean           rval = FALSE;
39700   ValNode           vn;
39701 
39702   if (rule == NULL || rule->replace == NULL || cds == NULL || cds->data.choice != SEQFEAT_CDREGION) {
39703     return FALSE;
39704   }
39705 
39706   protbsp = BioseqFindFromSeqLoc (cds->product);
39707   protfeat = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &context);
39708   if (protfeat == NULL || protfeat->idx.subtype != FEATDEF_PROT
39709       || (prp = (ProtRefPtr) protfeat->data.value.ptrvalue) == NULL
39710       || prp->name == NULL) {
39711     return FALSE;
39712   }
39713   new_name = StringSave (prp->name->data.ptrvalue);
39714   if (ApplySuspectProductNameFixToString (rule, &new_name)) {
39715     if (fp != NULL) {
39716       fprintf (fp, "Changed '%s' to '%s'", prp->name->data.ptrvalue == NULL ? "" : (CharPtr) prp->name->data.ptrvalue, new_name);
39717     }
39718     if (rule->replace->move_to_note) {
39719       if (SetStringValue (&(cds->comment), prp->name->data.ptrvalue, ExistingTextOption_append_semi)) {
39720         if (fp != NULL) {
39721           fprintf (fp, " and moved original to note");
39722         }
39723       }
39724     }
39725     prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue);
39726     prp->name->data.ptrvalue = new_name;
39727     if (AdjustmRNAProductToMatchProteinProduct(protfeat)) {
39728       if (fp != NULL) {
39729         fprintf (fp, " and adjusted mRNA");
39730       }
39731     }
39732     if (fp != NULL) {
39733       MemSet (&vn, 0, sizeof (ValNode));
39734       vn.choice = OBJ_SEQFEAT;
39735       vn.data.ptrvalue = cds;
39736       desc = GetDiscrepancyItemText (&vn);
39737       if (desc != NULL) {
39738         fprintf (fp, " for %s", desc);
39739         desc = MemFree (desc);
39740       }
39741       fprintf (fp, "\n");
39742     }
39743     rval = TRUE;
39744   } else {
39745     new_name = MemFree (new_name);
39746   }
39747   return rval;
39748 }
39749 
39750 
TextFromSearchFunc(ValNodePtr s)39751 static CharPtr TextFromSearchFunc (ValNodePtr s)
39752 {
39753   StringConstraintPtr scp;
39754 
39755   CharPtr rval = NULL;
39756 
39757   if (s == NULL) {
39758     return NULL;
39759   }
39760   switch (s->choice) {
39761     case SearchFunc_string_constraint:
39762       scp = (StringConstraintPtr) s->data.ptrvalue;
39763       if (scp != NULL) {
39764         rval = scp->match_text;
39765       }
39766       break;
39767     case SearchFunc_contains_plural:
39768     case SearchFunc_n_or_more_brackets_or_parentheses:
39769     case SearchFunc_three_numbers:
39770     case SearchFunc_all_caps:
39771     case SearchFunc_unbalanced_paren:
39772     case SearchFunc_too_long:
39773       /* no text */
39774       break;
39775     case SearchFunc_underscore:
39776       rval = "_";
39777       break;
39778     case SearchFunc_prefix_and_numbers:
39779     case SearchFunc_has_term:
39780       rval = s->data.ptrvalue;
39781       break;
39782   }
39783   return rval;
39784 }
39785 
39786 
CompareSearchFunc(ValNodePtr s1,ValNodePtr s2)39787 static int CompareSearchFunc (ValNodePtr s1, ValNodePtr s2)
39788 {
39789   CharPtr txt1, txt2;
39790   int rval;
39791 
39792   if (s1 == NULL && s2 == NULL) {
39793     rval = 0;
39794   } else if (s1 == NULL) {
39795     rval = -1;
39796   } else if (s2 == NULL) {
39797     rval = 1;
39798   } else {
39799     txt1 = TextFromSearchFunc (s1);
39800     txt2 = TextFromSearchFunc (s2);
39801     rval = StringICmp (txt1, txt2);
39802     if (rval == 0) {
39803       if (s1->choice < s2->choice) {
39804         rval = -1;
39805       } else if (s1->choice > s2->choice) {
39806         rval = 1;
39807       }
39808     }
39809   }
39810   return rval;
39811 }
39812 
39813 
CompareSuspectRuleByFind(SuspectRulePtr rule1,SuspectRulePtr rule2)39814 static int CompareSuspectRuleByFind (SuspectRulePtr rule1, SuspectRulePtr rule2)
39815 {
39816   int rval = 0;
39817 
39818   if (rule1 == NULL && rule2 == NULL) {
39819     rval = 0;
39820   } else if (rule1 == NULL) {
39821     rval = -1;
39822   } else if (rule2 == NULL) {
39823     rval = 1;
39824   } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) {
39825     /* no further comparisons */
39826   }
39827 
39828   return rval;
39829 }
39830 
39831 
SortVnpBySuspectRuleFind(VoidPtr ptr1,VoidPtr ptr2)39832 static int LIBCALLBACK SortVnpBySuspectRuleFind (VoidPtr ptr1, VoidPtr ptr2)
39833 {
39834   ValNodePtr  vnp1;
39835   ValNodePtr  vnp2;
39836   int         rval = 0;
39837 
39838   if (ptr1 != NULL && ptr2 != NULL) {
39839     vnp1 = *((ValNodePtr PNTR) ptr1);
39840     vnp2 = *((ValNodePtr PNTR) ptr2);
39841     if (vnp1 == NULL && vnp2 == NULL) {
39842       rval = 0;
39843     } else if (vnp1 == NULL) {
39844       rval = -1;
39845     } else if (vnp2 == NULL) {
39846       rval = 1;
39847     } else {
39848       rval = CompareSuspectRuleByFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
39849     }
39850   }
39851   return rval;
39852 }
39853 
39854 
CompareSuspectRuleByFixTypeThenFind(SuspectRulePtr rule1,SuspectRulePtr rule2)39855 static int CompareSuspectRuleByFixTypeThenFind (SuspectRulePtr rule1, SuspectRulePtr rule2)
39856 {
39857   int rval = 0;
39858 
39859   if (rule1 == NULL && rule2 == NULL) {
39860     rval = 0;
39861   } else if (rule1 == NULL) {
39862     rval = -1;
39863   } else if (rule2 == NULL) {
39864     rval = 1;
39865   } else if (rule1->rule_type < rule2->rule_type) {
39866     rval = -1;
39867   } else if (rule1->rule_type > rule2->rule_type) {
39868     rval = 1;
39869   } else if ((rval = CompareSearchFunc (rule1->find, rule2->find)) != 0) {
39870     /* no further comparisons */
39871   }
39872 
39873   return rval;
39874 }
39875 
39876 
SortVnpBySuspectRuleFixTypeThenFind(VoidPtr ptr1,VoidPtr ptr2)39877 static int LIBCALLBACK SortVnpBySuspectRuleFixTypeThenFind (VoidPtr ptr1, VoidPtr ptr2)
39878 {
39879   ValNodePtr  vnp1;
39880   ValNodePtr  vnp2;
39881   int         rval = 0;
39882 
39883   if (ptr1 != NULL && ptr2 != NULL) {
39884     vnp1 = *((ValNodePtr PNTR) ptr1);
39885     vnp2 = *((ValNodePtr PNTR) ptr2);
39886     if (vnp1 == NULL && vnp2 == NULL) {
39887       rval = 0;
39888     } else if (vnp1 == NULL) {
39889       rval = -1;
39890     } else if (vnp2 == NULL) {
39891       rval = 1;
39892     } else {
39893       rval = CompareSuspectRuleByFixTypeThenFind (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
39894     }
39895   }
39896   return rval;
39897 }
39898 
39899 
MakeValNodeListFromSuspectRuleSet(SuspectRuleSetPtr rules)39900 static ValNodePtr MakeValNodeListFromSuspectRuleSet (SuspectRuleSetPtr rules)
39901 {
39902   ValNodeBlock block;
39903   SuspectRulePtr one;
39904 
39905   InitValNodeBlock (&block, NULL);
39906   for (one = rules; one != NULL; one = one->next) {
39907     ValNodeAddPointerToEnd (&block, 0, one);
39908   }
39909   return block.head;
39910 }
39911 
39912 
MakeSuspectRuleSetFromValNodeList(ValNodePtr tmp_list)39913 static SuspectRuleSetPtr MakeSuspectRuleSetFromValNodeList (ValNodePtr tmp_list)
39914 {
39915   ValNodePtr vnp;
39916   SuspectRuleSetPtr first = NULL, last = NULL;
39917 
39918   for (vnp = tmp_list; vnp != NULL; vnp = vnp->next) {
39919     if (last == NULL) {
39920       first = vnp->data.ptrvalue;
39921     } else {
39922       last->next = vnp->data.ptrvalue;
39923     }
39924     last = vnp->data.ptrvalue;
39925     last->next = NULL;
39926   }
39927   return first;
39928 }
39929 
39930 
SortSuspectRuleSetByFind(SuspectRuleSetPtr PNTR rules)39931 NLM_EXTERN void SortSuspectRuleSetByFind (SuspectRuleSetPtr PNTR rules)
39932 {
39933   ValNodePtr   tmp_list;
39934 
39935   if (rules == NULL || *rules == NULL) {
39936     return;
39937   }
39938 
39939   tmp_list = MakeValNodeListFromSuspectRuleSet (*rules);
39940   tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFind);
39941 
39942   *rules = MakeSuspectRuleSetFromValNodeList (tmp_list);
39943   tmp_list = ValNodeFree (tmp_list);
39944 }
39945 
39946 
SortSuspectRuleSetByFixTypeThenFind(SuspectRuleSetPtr PNTR rules)39947 NLM_EXTERN void SortSuspectRuleSetByFixTypeThenFind (SuspectRuleSetPtr PNTR rules)
39948 {
39949   ValNodePtr   tmp_list;
39950 
39951   if (rules == NULL || *rules == NULL) {
39952     return;
39953   }
39954 
39955   tmp_list = MakeValNodeListFromSuspectRuleSet (*rules);
39956   tmp_list = ValNodeSort (tmp_list, SortVnpBySuspectRuleFixTypeThenFind);
39957 
39958   *rules = MakeSuspectRuleSetFromValNodeList (tmp_list);
39959   tmp_list = ValNodeFree (tmp_list);
39960 }
39961 
39962 
PrintSuspectRuleMatches(SeqEntryPtr sep,SuspectRuleSetPtr rules,FILE * fp)39963 NLM_EXTERN void PrintSuspectRuleMatches (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp)
39964 {
39965   ValNodePtr     vnp_l, vnp_o, obj_lists;
39966   SuspectRulePtr rule;
39967   CharPtr        summ;
39968 
39969   if (sep == NULL || rules == NULL || fp == NULL) {
39970     return;
39971   }
39972 
39973   obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT);
39974 
39975   for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) {
39976     if (ValNodeLen (vnp_l->data.ptrvalue) > 0) {
39977       summ = SummarizeSuspectRule (rule);
39978       fprintf (fp, "%s:%d\n", summ, ValNodeLen (vnp_l->data.ptrvalue));
39979       summ = MemFree (summ);
39980       for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) {
39981         summ = GetDiscrepancyItemText (vnp_o);
39982         fprintf (fp, "\t%s", summ);
39983         summ = MemFree (summ);
39984       }
39985     }
39986   }
39987 
39988   obj_lists = FreeListOfObjectLists (obj_lists);
39989 }
39990 
39991 
39992 NLM_EXTERN ValNodePtr
GetSuspectRuleDiscrepancies(SeqEntryPtr sep,SuspectRuleSetPtr rules,Uint2 featdef,Uint4 clickable_item_type)39993 GetSuspectRuleDiscrepancies
39994 (SeqEntryPtr       sep,
39995  SuspectRuleSetPtr rules,
39996  Uint2             featdef,
39997  Uint4             clickable_item_type)
39998 {
39999   ValNodePtr     vnp_l, obj_lists, rval = NULL;
40000   SuspectRulePtr rule;
40001   CharPtr        summ;
40002   CharPtr        rna_fmt = "%%d rRNA product names %s";
40003   CharPtr        cds_fmt = "%%d product names %s";
40004   CharPtr        template_fmt;
40005   CharPtr        fmt;
40006 
40007   if (sep == NULL || rules == NULL) {
40008     return NULL;
40009   }
40010 
40011   obj_lists = GetFeaturesForSuspectRules (sep, rules, featdef);
40012   if (featdef == FEATDEF_rRNA) {
40013     template_fmt = rna_fmt;
40014   } else {
40015     template_fmt = cds_fmt;
40016   }
40017 
40018   for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) {
40019     if (ValNodeLen (vnp_l->data.ptrvalue) > 0) {
40020       summ = SummarizeSuspectRule (rule);
40021       fmt = (CharPtr) MemNew (sizeof (Char) * (StringLen (summ) + StringLen (template_fmt)));
40022 
40023       sprintf (fmt, template_fmt, summ);
40024       summ = MemFree (summ);
40025       ValNodeAddPointer (&rval, 0, NewClickableItem (clickable_item_type, fmt, vnp_l->data.ptrvalue));
40026       vnp_l->data.ptrvalue = NULL;
40027       fmt = MemFree (fmt);
40028     }
40029   }
40030 
40031   obj_lists = FreeListOfObjectLists (obj_lists);
40032   return rval;
40033 }
40034 
40035 
ApplySuspectRuleFixesToSeqEntry(SeqEntryPtr sep,SuspectRuleSetPtr rules,FILE * fp)40036 NLM_EXTERN Int4 ApplySuspectRuleFixesToSeqEntry (SeqEntryPtr sep, SuspectRuleSetPtr rules, FILE *fp)
40037 {
40038   ValNodePtr     vnp_l, vnp_o, obj_lists;
40039   SuspectRulePtr rule;
40040   CharPtr      summ;
40041   Int4         num_changed = 0, total_num_changed = 0;
40042   Uint2        entityID;
40043 
40044   if (sep == NULL || rules == NULL) {
40045     return 0;
40046   }
40047 
40048   obj_lists = GetFeaturesForSuspectRules (sep, rules, FEATDEF_PROT);
40049 
40050   for (vnp_l = obj_lists, rule = rules; vnp_l != NULL && rule != NULL; vnp_l = vnp_l->next, rule = rule->next) {
40051     if (rule->replace == NULL || vnp_l->data.ptrvalue == NULL) {
40052       continue;
40053     }
40054     if (fp != NULL) {
40055       summ = SummarizeSuspectRule (rule);
40056       fprintf (fp, "%s:%d identified\n", summ, ValNodeLen (vnp_l->data.ptrvalue));
40057       summ = MemFree (summ);
40058     }
40059     num_changed = 0;
40060     for (vnp_o = vnp_l->data.ptrvalue; vnp_o != NULL; vnp_o = vnp_o->next) {
40061       if (ApplySuspectProductNameFixToFeature (rule, vnp_o->data.ptrvalue, fp)) {
40062         num_changed++;
40063       }
40064     }
40065     if (fp != NULL) {
40066       fprintf (fp, "Num fixed: %d\n", num_changed);
40067     }
40068     total_num_changed += num_changed;
40069   }
40070   entityID = ObjMgrGetEntityIDForChoice(sep);
40071   ObjMgrSetDirtyFlag (entityID, TRUE);
40072   ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
40073 
40074   obj_lists = FreeListOfObjectLists (obj_lists);
40075   return total_num_changed;
40076 }
40077 
40078 
40079 typedef struct rulesort {
40080   SuspectRulePtr rule;
40081   Int4 pos;
40082 } RuleSortData, PNTR RuleSortPtr;
40083 
RuleSortNew(SuspectRulePtr rule,Int4 pos)40084 static RuleSortPtr RuleSortNew (SuspectRulePtr rule, Int4 pos)
40085 {
40086   RuleSortPtr r;
40087 
40088   r = (RuleSortPtr) MemNew (sizeof (RuleSortData));
40089   r->rule = AsnIoMemCopy (rule, (AsnReadFunc)SuspectRuleAsnRead, (AsnWriteFunc) SuspectRuleAsnWrite);
40090   r->pos = pos;
40091   return r;
40092 }
40093 
40094 
RuleSortFree(RuleSortPtr r)40095 static RuleSortPtr RuleSortFree (RuleSortPtr r)
40096 {
40097   if (r != NULL) {
40098     r->rule = SuspectRuleFree (r->rule);
40099     r = MemFree (r);
40100   }
40101   return r;
40102 }
40103 
40104 
SortVnpByRuleSortRule(VoidPtr ptr1,VoidPtr ptr2)40105 static int LIBCALLBACK SortVnpByRuleSortRule (VoidPtr ptr1, VoidPtr ptr2)
40106 {
40107   ValNodePtr  vnp1;
40108   ValNodePtr  vnp2;
40109   RuleSortPtr r1, r2;
40110   int         rval = 0;
40111 
40112   if (ptr1 != NULL && ptr2 != NULL) {
40113     vnp1 = *((ValNodePtr PNTR) ptr1);
40114     vnp2 = *((ValNodePtr PNTR) ptr2);
40115     if (vnp1 == NULL && vnp2 == NULL) {
40116       rval = 0;
40117     } else if (vnp1 == NULL) {
40118       rval = -1;
40119     } else if (vnp2 == NULL) {
40120       rval = 1;
40121     } else {
40122       r1 = (RuleSortPtr) vnp1->data.ptrvalue;
40123       r2 = (RuleSortPtr) vnp2->data.ptrvalue;
40124       rval = CompareSuspectRuleByFixTypeThenFind (r1->rule, r2->rule);
40125     }
40126   }
40127   return rval;
40128 }
40129 
40130 
SortVnpByRuleSortPos(VoidPtr ptr1,VoidPtr ptr2)40131 static int LIBCALLBACK SortVnpByRuleSortPos (VoidPtr ptr1, VoidPtr ptr2)
40132 {
40133   ValNodePtr  vnp1;
40134   ValNodePtr  vnp2;
40135   RuleSortPtr r1, r2;
40136   int         rval = 0;
40137 
40138   if (ptr1 != NULL && ptr2 != NULL) {
40139     vnp1 = *((ValNodePtr PNTR) ptr1);
40140     vnp2 = *((ValNodePtr PNTR) ptr2);
40141     if (vnp1 == NULL && vnp2 == NULL) {
40142       rval = 0;
40143     } else if (vnp1 == NULL) {
40144       rval = -1;
40145     } else if (vnp2 == NULL) {
40146       rval = 1;
40147     } else {
40148       r1 = (RuleSortPtr) vnp1->data.ptrvalue;
40149       r2 = (RuleSortPtr) vnp2->data.ptrvalue;
40150       if (r1->pos < r2->pos) {
40151         rval = -1;
40152       } else if (r1->pos > r2->pos) {
40153         rval = 1;
40154       } else {
40155         rval = 0;
40156       }
40157     }
40158   }
40159   return rval;
40160 }
40161 
40162 
SuspectRuleSetToRuleSortList(SuspectRuleSetPtr set)40163 static ValNodePtr SuspectRuleSetToRuleSortList (SuspectRuleSetPtr set)
40164 {
40165   ValNodeBlock   block;
40166   SuspectRulePtr rule;
40167   Int4           pos;
40168 
40169   InitValNodeBlock (&block, NULL);
40170   for (rule = set, pos = 0; rule != NULL; rule = rule->next, pos++) {
40171     ValNodeAddPointerToEnd (&block, 0, RuleSortNew (rule, pos));
40172   }
40173   return block.head;
40174 }
40175 
40176 
RuleSortListToSuspectRuleSet(ValNodePtr list)40177 static SuspectRuleSetPtr RuleSortListToSuspectRuleSet (ValNodePtr list)
40178 {
40179   ValNodePtr vnp;
40180   SuspectRuleSetPtr set = NULL;
40181   SuspectRulePtr last = NULL;
40182   RuleSortPtr r;
40183 
40184   for (vnp = list; vnp != NULL; vnp = vnp->next) {
40185     r = (RuleSortPtr) vnp->data.ptrvalue;
40186     if (r->rule != NULL) {
40187       if (last == NULL) {
40188         set = r->rule;
40189       } else {
40190         last->next = r->rule;
40191       }
40192       last = r->rule;
40193       r->rule = NULL;
40194     }
40195   }
40196   return set;
40197 }
40198 
40199 
FindDiffsBetweenRuleSets(SuspectRuleSetPtr set1,SuspectRuleSetPtr set2,SuspectRuleSetPtr PNTR in1not2,SuspectRuleSetPtr PNTR in2not1)40200 NLM_EXTERN void FindDiffsBetweenRuleSets (SuspectRuleSetPtr set1, SuspectRuleSetPtr set2, SuspectRuleSetPtr PNTR in1not2, SuspectRuleSetPtr PNTR in2not1)
40201 {
40202   ValNodePtr list1, list2;
40203   ValNodePtr  vnp1, vnp2, cmp_start;
40204   RuleSortPtr r1, r2;
40205   Boolean found_match;
40206 
40207   /* eliminate duplicates, while maintaining original order */
40208   list1 = SuspectRuleSetToRuleSortList(set1);
40209   list1 = ValNodeSort(list1, SortVnpByRuleSortRule);
40210   list2 = SuspectRuleSetToRuleSortList(set2);
40211   list2 = ValNodeSort(list2, SortVnpByRuleSortRule);
40212 
40213   cmp_start = list2;
40214   for (vnp1 = list1; vnp1 != NULL; vnp1 = vnp1->next) {
40215     r1 = (RuleSortPtr) vnp1->data.ptrvalue;
40216     for (vnp2 = cmp_start; vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) > 0); vnp2 = vnp2->next) {
40217       cmp_start = vnp2;
40218     }
40219     found_match = FALSE;
40220     while (vnp2 != NULL && (vnp2->choice == 1 || SortVnpByRuleSortRule(&vnp1, &vnp2) == 0) && !found_match) {
40221       if (vnp2->data.ptrvalue != NULL) {
40222         r2 = (RuleSortPtr) vnp2->data.ptrvalue;
40223         if (AsnIoMemComp (r1->rule, r2->rule, (AsnWriteFunc) SuspectRuleAsnWrite)) {
40224           found_match = TRUE;
40225         }
40226       }
40227       if (!found_match) {
40228         vnp2 = vnp2->next;
40229       }
40230     }
40231     if (found_match) {
40232       vnp1->data.ptrvalue = RuleSortFree(vnp1->data.ptrvalue);
40233       vnp1->choice = 1;
40234       vnp2->data.ptrvalue = RuleSortFree(vnp2->data.ptrvalue);
40235       vnp2->choice = 1;
40236     }
40237   }
40238 
40239   vnp1 = ValNodeExtractList (&list1, 1);
40240   vnp1 = ValNodeFree (vnp1);
40241   vnp2 = ValNodeExtractList (&list2, 1);
40242   vnp2 = ValNodeFree (vnp2);
40243 
40244   list1 = ValNodeSort (list1, SortVnpByRuleSortPos);
40245   list2 = ValNodeSort (list2, SortVnpByRuleSortPos);
40246 
40247   *in1not2 = RuleSortListToSuspectRuleSet (list1);
40248   *in2not1 = RuleSortListToSuspectRuleSet (list2);
40249   list1 = ValNodeFreeData (list1);
40250   list2 = ValNodeFreeData (list2);
40251 }
40252 
40253 
ReportRuleSetProblems(CharPtr product_name,SuspectRuleSetPtr rule_list,FILE * output_file,CharPtr prefix)40254 static Boolean ReportRuleSetProblems (CharPtr product_name, SuspectRuleSetPtr rule_list, FILE *output_file, CharPtr prefix)
40255 {
40256   CharPtr summ;
40257   SuspectRulePtr rule;
40258   Boolean any_found = FALSE;
40259 
40260   /* report with rule set */
40261   for (rule = rule_list; rule != NULL; rule = rule->next) {
40262     if (MatchesSuspectProductRule (product_name, rule)) {
40263       summ = SummarizeSuspectRule(rule);
40264       if (output_file == NULL) {
40265         if (prefix != NULL) {
40266           printf ("%s\t", prefix);
40267         }
40268         printf ("%s\t%s\n", product_name, summ);
40269       } else {
40270         if (prefix != NULL) {
40271           fprintf (output_file, "%s\t", prefix);
40272         }
40273         fprintf (output_file, "%s\t%s\n", product_name, summ);
40274       }
40275       summ = MemFree (summ);
40276       any_found = TRUE;
40277     }
40278   }
40279   return any_found;
40280 }
40281 
40282 
FixRuleSetProblems(CharPtr PNTR product_name,SuspectRuleSetPtr rule_list)40283 static Boolean FixRuleSetProblems (CharPtr PNTR product_name, SuspectRuleSetPtr rule_list)
40284 {
40285   SuspectRulePtr rule;
40286   Boolean any_found = FALSE;
40287 
40288   if (product_name == NULL || *product_name == NULL) {
40289     return FALSE;
40290   }
40291   /* report with rule set */
40292   for (rule = rule_list; rule != NULL; rule = rule->next) {
40293     if (rule->replace != NULL && MatchesSuspectProductRule (*product_name, rule)) {
40294       any_found |= ApplySuspectProductNameFixToString (rule, product_name);
40295     }
40296   }
40297   return any_found;
40298 }
40299 
40300 
FindSuspectProductNamesInEntrezGene(FILE * input_file,SuspectRuleSetPtr rule_list,FILE * output_file)40301 NLM_EXTERN Boolean FindSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file)
40302 {
40303   EntrezgenePtr  egp;
40304   AsnIoPtr       aip;
40305   ValNodePtr     vnp;
40306   ProtRefPtr     prp;
40307   GeneRefPtr     grp;
40308   CharPtr        prefix = NULL;
40309   Char           geneid_buf[20];
40310 
40311   aip = AsnIoNew (ASNIO_TEXT_IN, input_file, NULL, NULL, NULL);
40312   egp = EntrezgeneAsnRead (aip, NULL);
40313   if (egp == NULL) {
40314     return FALSE;
40315   }
40316 
40317   /* scan */
40318   if (egp->prot != NULL) {
40319     if (egp->track_info != NULL && egp->track_info->geneid > 0) {
40320       sprintf (geneid_buf, "%d", egp->track_info->geneid);
40321       prefix = geneid_buf;
40322     } else if (egp->gene != NULL) {
40323       grp = (GeneRefPtr) egp->gene;
40324       if (grp->locus_tag == NULL) {
40325         prefix = grp->locus;
40326       } else {
40327         prefix = grp->locus_tag;
40328       }
40329     }
40330     prp = (ProtRefPtr) egp->prot;
40331     for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
40332       if (rule_list == NULL) {
40333         ReportProductNameProblems (vnp->data.ptrvalue, output_file, prefix);
40334       } else {
40335         ReportRuleSetProblems (vnp->data.ptrvalue, rule_list, output_file, prefix);
40336       }
40337     }
40338   }
40339 
40340   egp = EntrezgeneFree (egp);
40341   return TRUE;
40342 }
40343 
40344 
FixSuspectProductNamesInEntrezGene(FILE * input_file,SuspectRuleSetPtr rule_list,FILE * output_file)40345 NLM_EXTERN Boolean FixSuspectProductNamesInEntrezGene (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file)
40346 {
40347   EntrezgenePtr  egp;
40348   AsnIoPtr       aip;
40349   ValNodePtr     vnp;
40350   ProtRefPtr     prp;
40351   GeneRefPtr     grp;
40352   CharPtr        prefix = NULL;
40353   Char           geneid_buf[20];
40354   CharPtr        product_name;
40355 
40356   aip = AsnIoNew (ASNIO_TEXT_IN, input_file, NULL, NULL, NULL);
40357   egp = EntrezgeneAsnRead (aip, NULL);
40358   if (egp == NULL) {
40359     return FALSE;
40360   }
40361 
40362   /* scan */
40363   if (egp->prot != NULL) {
40364     if (egp->track_info != NULL && egp->track_info->geneid > 0) {
40365       sprintf (geneid_buf, "%d", egp->track_info->geneid);
40366       prefix = geneid_buf;
40367     } else if (egp->gene != NULL) {
40368       grp = (GeneRefPtr) egp->gene;
40369       if (grp->locus_tag == NULL) {
40370         prefix = grp->locus;
40371       } else {
40372         prefix = grp->locus_tag;
40373       }
40374     }
40375     prp = (ProtRefPtr) egp->prot;
40376     for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
40377       product_name = vnp->data.ptrvalue;
40378       if (rule_list == NULL) {
40379         FixProductNameProblems (&product_name);
40380       } else {
40381         FixRuleSetProblems (&product_name, rule_list);
40382       }
40383       fprintf (output_file, "%s\n", product_name);
40384       vnp->data.ptrvalue = product_name;
40385     }
40386   }
40387 
40388   egp = EntrezgeneFree (egp);
40389   return TRUE;
40390 }
40391 
40392 
40393 
FindSuspectProductNamesInNameList(FILE * input_file,SuspectRuleSetPtr rule_list,FILE * output_file)40394 NLM_EXTERN void FindSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file)
40395 {
40396   ReadBufferData    rbd;
40397   CharPtr           line;
40398 
40399   rbd.fp = input_file;
40400   rbd.current_data = NULL;
40401 
40402   line = AbstractReadFunction (&rbd);
40403   while (line != NULL)
40404   {
40405     if (rule_list == NULL) {
40406       ReportProductNameProblems (line, output_file, NULL);
40407     } else {
40408       ReportRuleSetProblems (line, rule_list, output_file, NULL);
40409     }
40410 
40411     line = MemFree (line);
40412     line = AbstractReadFunction (&rbd);
40413   }
40414 }
40415 
40416 
FixSuspectProductNamesInNameList(FILE * input_file,SuspectRuleSetPtr rule_list,FILE * output_file)40417 NLM_EXTERN void FixSuspectProductNamesInNameList (FILE *input_file, SuspectRuleSetPtr rule_list, FILE *output_file)
40418 {
40419   ReadBufferData    rbd;
40420   CharPtr           line;
40421 
40422   rbd.fp = input_file;
40423   rbd.current_data = NULL;
40424 
40425   line = AbstractReadFunction (&rbd);
40426   while (line != NULL)
40427   {
40428     if (rule_list == NULL) {
40429       FixProductNameProblems (&line);
40430     } else {
40431       FixRuleSetProblems (&line, rule_list);
40432     }
40433     fprintf (output_file, "%s\n", line);
40434 
40435     line = MemFree (line);
40436     line = AbstractReadFunction (&rbd);
40437   }
40438 }
40439 
40440 
40441 /* code for special product table update */
40442 typedef struct productupdatetableitem {
40443   CharPtr product_match;
40444   CharPtr new_name;
40445   CharPtr note_text;
40446 } ProductUpdateTableItemData, PNTR ProductUpdateTableItemPtr;
40447 
40448 
ProductUpdateTableItemNew(CharPtr product_match)40449 static ProductUpdateTableItemPtr ProductUpdateTableItemNew (CharPtr product_match)
40450 {
40451   ProductUpdateTableItemPtr item;
40452 
40453   item = (ProductUpdateTableItemPtr) MemNew (sizeof (ProductUpdateTableItemData));
40454   MemSet (item, 0, sizeof (ProductUpdateTableItemData));
40455   item->product_match = product_match;
40456   return item;
40457 }
40458 
40459 
ProductUpdateTableItemFree(ProductUpdateTableItemPtr item)40460 static ProductUpdateTableItemPtr ProductUpdateTableItemFree (ProductUpdateTableItemPtr item)
40461 {
40462   if (item != NULL) {
40463     item->product_match = MemFree (item->product_match);
40464     item->new_name = MemFree (item->new_name);
40465     item->note_text = MemFree (item->note_text);
40466     item = MemFree (item);
40467   }
40468   return item;
40469 }
40470 
40471 
ProductUpdateTableItemWrite(FILE * fp,ProductUpdateTableItemPtr item)40472 static void ProductUpdateTableItemWrite (FILE *fp, ProductUpdateTableItemPtr item)
40473 {
40474   if (fp == NULL || item == NULL || StringHasNoText (item->product_match)) {
40475     return;
40476   }
40477 
40478   fprintf (fp, "%s", item->product_match);
40479   if (!StringHasNoText (item->new_name)) {
40480     fprintf (fp, "\tX\t%s", StringICmp (item->new_name, "hypothetical protein") == 0 ? "" : item->new_name);
40481     if (!StringHasNoText (item->note_text)) {
40482       fprintf (fp, "\tX\t%s", StringCmp (item->note_text, item->product_match) == 0 ? "" : item->note_text);
40483     }
40484   }
40485   fprintf (fp, "\n");
40486 }
40487 
40488 
ProductUpdateTableFree(ValNodePtr list)40489 NLM_EXTERN ValNodePtr ProductUpdateTableFree (ValNodePtr list)
40490 {
40491   ValNodePtr list_next;
40492 
40493   while (list != NULL) {
40494     list_next = list->next;
40495     list->next = NULL;
40496     list->data.ptrvalue = ProductUpdateTableItemFree (list->data.ptrvalue);
40497     list = ValNodeFree (list);
40498     list = list_next;
40499   }
40500   return list;
40501 }
40502 
40503 
TrimBeginningAndEndingQuotes(CharPtr str)40504 static void TrimBeginningAndEndingQuotes (CharPtr str)
40505 {
40506   CharPtr src, dst;
40507 
40508   if (str == NULL) {
40509     return;
40510   }
40511   if (*str == '"') {
40512     src = str + 1;
40513     dst = src;
40514     while (*src != 0) {
40515       *dst = *src;
40516       dst++;
40517       src++;
40518     }
40519     *dst = 0;
40520   }
40521   dst = str + StringLen(str) - 1;
40522   if (*dst == '"') {
40523     *dst = 0;
40524   }
40525 }
40526 
40527 
ProductUpdateTableItemFromValNodeList(ValNodePtr column_list)40528 static ProductUpdateTableItemPtr ProductUpdateTableItemFromValNodeList (ValNodePtr column_list)
40529 {
40530   ProductUpdateTableItemPtr item;
40531   ValNodePtr vnp;
40532 
40533   if (column_list == NULL || StringHasNoText (column_list->data.ptrvalue)
40534       || column_list->next == NULL
40535       || StringICmp (column_list->next->data.ptrvalue, "X") != 0) {
40536     return NULL;
40537   }
40538 
40539   item = ProductUpdateTableItemNew(column_list->data.ptrvalue);
40540   column_list->data.ptrvalue = NULL;
40541   vnp = column_list->next->next;
40542 
40543   /* get new product name.  Default to hypothetical protein if not specified */
40544   if (vnp == NULL || StringHasNoText (vnp->data.ptrvalue)) {
40545     item->new_name = StringSave ("hypothetical protein");
40546   } else {
40547     item->new_name = vnp->data.ptrvalue;
40548     vnp->data.ptrvalue = NULL;
40549   }
40550   if (vnp != NULL) {
40551     vnp = vnp->next;
40552   }
40553 
40554   /* find out if note is required */
40555   if (vnp != NULL && StringCmp (vnp->data.ptrvalue, "X") == 0) {
40556     if (vnp->next == NULL || StringHasNoText (vnp->next->data.ptrvalue)) {
40557       item->note_text = StringSave (item->product_match);
40558     } else {
40559       item->note_text = vnp->next->data.ptrvalue;
40560       vnp->next->data.ptrvalue = NULL;
40561     }
40562   }
40563   return item;
40564 }
40565 
40566 
ReadProductUpdateTable(FILE * fp)40567 NLM_EXTERN ValNodePtr ReadProductUpdateTable (FILE *fp)
40568 {
40569   ReadBufferData rbd;
40570   CharPtr        line;
40571   ValNodeBlock   line_list;
40572   ValNodePtr     column_list;
40573   ProductUpdateTableItemPtr item;
40574 
40575   if (fp == NULL) return NULL;
40576   rbd.fp = fp;
40577   rbd.current_data = NULL;
40578 
40579   InitValNodeBlock (&line_list, NULL);
40580 
40581   line = AbstractReadFunction (&rbd);
40582   while (line != NULL)
40583   {
40584     column_list = ReadOneColumnList (line);
40585     if (column_list != NULL) {
40586       TrimBeginningAndEndingQuotes(column_list->data.ptrvalue);
40587       item = ProductUpdateTableItemFromValNodeList(column_list);
40588       if (item != NULL) {
40589         ValNodeAddPointerToEnd (&line_list, 0, item);
40590       }
40591       column_list = ValNodeFreeData (column_list);
40592     }
40593     line = AbstractReadFunction (&rbd);
40594   }
40595   return line_list.head;
40596 }
40597 
40598 
WriteProductUpdateTable(FILE * fp,ValNodePtr table)40599 static void WriteProductUpdateTable (FILE *fp, ValNodePtr table)
40600 {
40601   ValNodePtr vnp;
40602   ProductUpdateTableItemPtr item;
40603 
40604   for (vnp = table; vnp != NULL; vnp = vnp->next) {
40605     item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue;
40606     if (item != NULL && !StringHasNoText (item->product_match)) {
40607       ProductUpdateTableItemWrite(fp, item);
40608     }
40609   }
40610 }
40611 
40612 
GetProductUpdateTableItemForProduct(CharPtr product,ValNodePtr list)40613 static ProductUpdateTableItemPtr GetProductUpdateTableItemForProduct (CharPtr product, ValNodePtr list)
40614 {
40615   ProductUpdateTableItemPtr item;
40616   ValNodePtr vnp;
40617 
40618   if (StringHasNoText (product) || list == NULL) {
40619     return NULL;
40620   }
40621 
40622   for (vnp = list; vnp != NULL; vnp = vnp->next) {
40623     if ((item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue) != NULL
40624       && StringCmp (product, item->product_match) == 0) {
40625       return item;
40626     }
40627   }
40628   return NULL;
40629 }
40630 
40631 
40632 typedef struct productupdate {
40633   ValNodePtr table;
40634   FILE *log_fp;
40635   Boolean any_change;
40636 } ProductUpdateData, PNTR ProductUpdatePtr;
40637 
ApplyProductUpdateCallback(SeqFeatPtr sfp,Pointer data)40638 static void ApplyProductUpdateCallback (SeqFeatPtr sfp, Pointer data)
40639 {
40640   ProductUpdatePtr pd;
40641   BioseqPtr        pbsp;
40642   SeqFeatPtr       prot;
40643   ProtRefPtr       prp = NULL;
40644   SeqMgrFeatContext context;
40645   ProductUpdateTableItemPtr item = NULL;
40646   Char                      buf[255];
40647   ValNodePtr                vnp;
40648   Boolean                   adjusted_mrna;
40649 
40650   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION
40651       || (pd = (ProductUpdatePtr) data) == NULL) {
40652     return;
40653   }
40654 
40655   pbsp = BioseqFindFromSeqLoc (sfp->product);
40656   prot = SeqMgrGetNextFeature (pbsp, NULL, 0, FEATDEF_PROT, &context);
40657   if (prot == NULL || (prp = (ProtRefPtr) prot->data.value.ptrvalue) == NULL) {
40658     prp = GetProtRefForFeature(sfp);
40659   }
40660   if (prp != NULL && prp->name != NULL) {
40661     item = GetProductUpdateTableItemForProduct (prp->name->data.ptrvalue, pd->table);
40662 
40663     if (item != NULL) {
40664       prp->name->data.ptrvalue = MemFree (prp->name->data.ptrvalue);
40665       prp->name->data.ptrvalue = StringSave (item->new_name);
40666       if (item->note_text != NULL) {
40667         SetStringValue (&(sfp->comment), item->note_text, ExistingTextOption_append_semi);
40668       }
40669       /* also need to move ec numbers to note, if any, for hypothetical protein */
40670       if (StringICmp (item->new_name, "hypothetical protein") == 0
40671           && prp->ec != NULL) {
40672         SetStringValue (&(sfp->comment), " EC_number=", ExistingTextOption_append_semi);
40673         SetStringValue (&(sfp->comment), prp->ec->data.ptrvalue, ExistingTextOption_append_none);
40674         for (vnp = prp->ec->next; vnp != NULL; vnp = vnp->next) {
40675           SetStringValue (&(sfp->comment), vnp->data.ptrvalue, ExistingTextOption_append_comma);
40676         }
40677       }
40678 
40679       adjusted_mrna = AdjustmRNAProductToMatchProteinProduct(prot);
40680 
40681       pd->any_change = TRUE;
40682       if (pd->log_fp != NULL) {
40683         SeqIdWrite (SeqIdFindBest (pbsp->id, SEQID_GENBANK), buf, PRINTID_REPORT, sizeof (buf) - 1);
40684         fprintf (pd->log_fp, "%s\t%s\t%s\t%s\t%s\n", buf, item->product_match, item->new_name,
40685                  item->note_text == NULL ? "" : item->note_text,
40686                  adjusted_mrna ? "Adjusted mRNA" : "");
40687       }
40688     }
40689   }
40690 }
40691 
40692 
ApplyProductUpdateTable(ValNodePtr table,SeqEntryPtr sep,FILE * log_fp)40693 NLM_EXTERN Boolean ApplyProductUpdateTable (ValNodePtr table, SeqEntryPtr sep, FILE *log_fp)
40694 {
40695   ProductUpdateData pd;
40696 
40697   if (table == NULL || sep == NULL) {
40698     return FALSE;
40699   }
40700 
40701   MemSet (&pd, 0, sizeof (ProductUpdateData));
40702   pd.table = table;
40703   pd.log_fp = log_fp;
40704 
40705   VisitFeaturesInSep (sep, &pd, ApplyProductUpdateCallback);
40706   return pd.any_change;
40707 }
40708 
40709 
ExportProductUpdateTableCallback(SeqFeatPtr sfp,Pointer data)40710 static void ExportProductUpdateTableCallback (SeqFeatPtr sfp, Pointer data)
40711 {
40712   ProtRefPtr prp;
40713 
40714   if (sfp == NULL || data == NULL) {
40715     return;
40716   }
40717   if (sfp->data.choice == SEQFEAT_PROT
40718       && (prp = (ProtRefPtr)sfp->data.value.ptrvalue) != NULL
40719       && prp->name != NULL
40720       && !StringHasNoText (prp->name->data.ptrvalue)) {
40721     ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue));
40722   } else if (sfp->data.choice == SEQFEAT_CDREGION
40723       && (prp = GetProtRefForFeature(sfp)) != NULL
40724       && prp->name != NULL
40725       && !StringHasNoText (prp->name->data.ptrvalue)) {
40726     ValNodeAddPointerToEnd ((ValNodeBlockPtr) data, 0, StringSave (prp->name->data.ptrvalue));
40727   }
40728 }
40729 
40730 
ExportProductUpdateTable(SeqEntryPtr sep,FILE * fp)40731 NLM_EXTERN void ExportProductUpdateTable (SeqEntryPtr sep, FILE *fp)
40732 {
40733   ValNodeBlock block;
40734   ValNodePtr vnp;
40735 
40736   if (sep == NULL || fp == NULL) {
40737     return;
40738   }
40739   InitValNodeBlock (&block, NULL);
40740 
40741   VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback);
40742 
40743   block.head = ValNodeSort (block.head, SortVnpByString);
40744   ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData);
40745 
40746   for (vnp = block.head; vnp != NULL; vnp = vnp->next) {
40747     fprintf (fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
40748   }
40749   block.head = ValNodeFreeData (block.head);
40750 }
40751 
40752 
ApplySuspectProductNameFixToProductUpdateTableItem(SuspectRulePtr rule,ProductUpdateTableItemPtr item)40753 static Boolean ApplySuspectProductNameFixToProductUpdateTableItem (SuspectRulePtr rule, ProductUpdateTableItemPtr item)
40754 {
40755   CharPtr           new_name;
40756   Boolean           rval = FALSE;
40757 
40758   if (rule == NULL || rule->replace == NULL || item == NULL || StringHasNoText (item->product_match)) {
40759     return FALSE;
40760   }
40761 
40762   if (item->new_name == NULL) {
40763     new_name = StringSave (item->product_match);
40764   } else {
40765     new_name = StringSave (item->new_name);
40766   }
40767   if (ApplySuspectProductNameFixToString (rule, &new_name)) {
40768     item->new_name = MemFree (item->new_name);
40769     item->note_text = MemFree (item->note_text);
40770     item->new_name = new_name;
40771     if (rule->replace->move_to_note) {
40772       item->note_text = StringSave (item->product_match);
40773     }
40774     rval = TRUE;
40775   } else {
40776     new_name = MemFree (new_name);
40777   }
40778   return rval;
40779 }
40780 
40781 
ApplySuspectProductNameFixesToProductUpdateTable(SuspectRuleSetPtr rule_set,ValNodePtr table)40782 static Boolean ApplySuspectProductNameFixesToProductUpdateTable (SuspectRuleSetPtr rule_set, ValNodePtr table)
40783 {
40784   SuspectRulePtr rule;
40785   ValNodePtr     vnp;
40786   Boolean        rval = FALSE, this_rule_apply, this_rule_match;
40787   ProductUpdateTableItemPtr item;
40788 
40789   if (rule_set == NULL || table == NULL) {
40790     return FALSE;
40791   }
40792 
40793   for (vnp = table; vnp != NULL; vnp = vnp->next) {
40794     this_rule_apply = FALSE;
40795     this_rule_match = FALSE;
40796     item = (ProductUpdateTableItemPtr) vnp->data.ptrvalue;
40797     for (rule = rule_set; rule != NULL; rule = rule->next) {
40798       if (ApplySuspectProductNameFixToProductUpdateTableItem (rule, item)) {
40799         this_rule_apply = TRUE;
40800       } else if (!this_rule_apply && !this_rule_match) {
40801         this_rule_match = MatchesSuspectProductRule (item->product_match, rule);
40802       }
40803     }
40804     if (!this_rule_apply && this_rule_match) {
40805       item->new_name = StringSave ("hypothetical protein");
40806       item->note_text = StringSave (item->product_match);
40807     }
40808   }
40809   return rval;
40810 }
40811 
40812 
ExportProductUpdateTableWithPrecomputedSuggestions(FILE * fp,SeqEntryPtr sep,SuspectRuleSetPtr rules)40813 NLM_EXTERN void ExportProductUpdateTableWithPrecomputedSuggestions (FILE *fp, SeqEntryPtr sep, SuspectRuleSetPtr rules)
40814 {
40815   ValNodeBlock block;
40816   ValNodePtr vnp;
40817   ProductUpdateTableItemPtr item;
40818 
40819   if (sep == NULL || fp == NULL) {
40820     return;
40821   }
40822   InitValNodeBlock (&block, NULL);
40823 
40824   VisitFeaturesInSep (sep, &block, ExportProductUpdateTableCallback);
40825 
40826   block.head = ValNodeSort (block.head, SortVnpByString);
40827   ValNodeUnique (&(block.head), SortVnpByString, ValNodeFreeData);
40828 
40829   for (vnp = block.head; vnp != NULL; vnp = vnp->next) {
40830     item = ProductUpdateTableItemNew(vnp->data.ptrvalue);
40831     vnp->data.ptrvalue = item;
40832   }
40833 
40834   ApplySuspectProductNameFixesToProductUpdateTable (rules, block.head);
40835 
40836   WriteProductUpdateTable (fp, block.head);
40837 
40838   block.head = ProductUpdateTableFree (block.head);
40839 }
40840 
40841 
FindBioseqInValNodeList(BioseqPtr bsp,ValNodePtr list)40842 static Int4 FindBioseqInValNodeList (BioseqPtr bsp, ValNodePtr list)
40843 {
40844   Int4 pos = 0;
40845   ValNodePtr vnp;
40846 
40847   vnp = list;
40848   while (vnp != NULL && bsp != vnp->data.ptrvalue) {
40849     pos++;
40850     vnp = vnp->next;
40851   }
40852   if (vnp == NULL) {
40853     return -1;
40854   } else {
40855     return pos;
40856   }
40857 }
40858 
40859 
40860 /* for update sequence matching */
40861 /* note - must set scope to original before calling */
ShuffleUpdateBioseqListWithIndex(ValNodePtr PNTR update_bioseq_list,ValNodePtr orig_bioseq_list)40862 NLM_EXTERN ValNodePtr ShuffleUpdateBioseqListWithIndex (ValNodePtr PNTR update_bioseq_list, ValNodePtr orig_bioseq_list)
40863 {
40864   ValNodePtr     unmatched_list = NULL;
40865   Int4           update_pos;
40866   BioseqPtr      orig_bsp, update_bsp;
40867   BioseqSearchIndexPtr index;
40868   ValNodePtr     unmatched_vnp, unmatched_next, unmatched_prev = NULL, update_prev = NULL;
40869   SeqIdPtr       sip;
40870   ObjectIdPtr    oip;
40871   BioseqPtr PNTR update_vector;
40872   Int4           len;
40873 
40874   if (update_bioseq_list == NULL || *update_bioseq_list == NULL)
40875   {
40876     return NULL;
40877   }
40878   else if (orig_bioseq_list == NULL)
40879   {
40880     unmatched_list = *update_bioseq_list;
40881     *update_bioseq_list = NULL;
40882     return unmatched_list;
40883   }
40884 
40885   len = ValNodeLen (orig_bioseq_list);
40886   update_vector = (BioseqPtr PNTR) MemNew (sizeof (BioseqPtr) * len);
40887   MemSet (update_vector, 0, sizeof (BioseqPtr) * len);
40888 
40889   index = BuildIDStringsListForBioseqList (orig_bioseq_list);
40890   /* for each update sequence, identifies original Bioseq and if found, removes from list */
40891   for (unmatched_vnp = *update_bioseq_list; unmatched_vnp != NULL; unmatched_vnp = unmatched_next) {
40892     unmatched_next = unmatched_vnp->next;
40893     update_bsp = unmatched_vnp->data.ptrvalue;
40894     orig_bsp = NULL;
40895     if (update_bsp != NULL) {
40896       for (sip = update_bsp->id; sip != NULL && orig_bsp == NULL; sip = sip->next) {
40897         if (sip->choice == SEQID_LOCAL && (oip = (ObjectIdPtr)sip->data.ptrvalue) != NULL && oip->str != NULL) {
40898           orig_bsp = FindStringInIdListIndex (oip->str, index);
40899         } else {
40900           orig_bsp = BioseqFind (sip);
40901         }
40902       }
40903     }
40904     if (orig_bsp != NULL && (update_pos = FindBioseqInValNodeList (orig_bsp, orig_bioseq_list)) > -1) {
40905       update_vector[update_pos] = update_bsp;
40906       if (unmatched_prev == NULL) {
40907         *update_bioseq_list = unmatched_vnp->next;
40908       } else {
40909         unmatched_prev->next = unmatched_vnp->next;
40910       }
40911       unmatched_vnp->next = NULL;
40912       unmatched_vnp = ValNodeFree (unmatched_vnp);
40913     } else {
40914       unmatched_prev = unmatched_vnp;
40915     }
40916   }
40917   index = BioseqSearchIndexFree (index);
40918 
40919   unmatched_list = *update_bioseq_list;
40920   *update_bioseq_list = NULL;
40921   *update_bioseq_list = ValNodeNew (NULL);
40922   (*update_bioseq_list)->data.ptrvalue = update_vector[0];
40923   update_prev = (*update_bioseq_list);
40924   for (update_pos = 1; update_pos < len; update_pos++) {
40925     update_prev = ValNodeNew (update_prev);
40926     update_prev->data.ptrvalue = update_vector[update_pos];
40927     update_prev->choice = update_pos;
40928   }
40929 
40930   return unmatched_list;
40931 }
40932 
40933 
SortVnpByInt(VoidPtr ptr1,VoidPtr ptr2)40934 static int LIBCALLBACK SortVnpByInt (VoidPtr ptr1, VoidPtr ptr2)
40935 
40936 {
40937   ValNodePtr  vnp1;
40938   ValNodePtr  vnp2;
40939 
40940   if (ptr1 == NULL || ptr2 == NULL) return 0;
40941   vnp1 = *((ValNodePtr PNTR) ptr1);
40942   vnp2 = *((ValNodePtr PNTR) ptr2);
40943   if (vnp1 == NULL || vnp2 == NULL) return 0;
40944 
40945   if (vnp1->data.intvalue > vnp2->data.intvalue) {
40946     return 1;
40947   } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
40948     return -1;
40949   }
40950 
40951   return 0;
40952 }
40953 
40954 
AddGeneQualifiersToNote(SeqFeatPtr gene,CharPtr PNTR note)40955 static void AddGeneQualifiersToNote (SeqFeatPtr gene, CharPtr PNTR note)
40956 {
40957   GeneRefPtr grp;
40958   GBQualPtr gbq;
40959 
40960   if (gene == NULL || note == NULL || gene->data.choice != SEQFEAT_GENE) {
40961     return;
40962   }
40963 
40964   grp = (GeneRefPtr) gene->data.value.ptrvalue;
40965   if (!StringHasNoText(grp->locus)) {
40966     SetStringValue(note, grp->locus, ExistingTextOption_prefix_semi);
40967   }
40968   if (!StringHasNoText(grp->allele)) {
40969     SetStringValue(note, grp->allele, ExistingTextOption_prefix_semi);
40970   }
40971   if (!StringHasNoText(grp->desc)) {
40972     SetStringValue(note, grp->desc, ExistingTextOption_prefix_semi);
40973   }
40974   if (!StringHasNoText(grp->maploc)) {
40975     SetStringValue(note, grp->maploc, ExistingTextOption_prefix_semi);
40976   }
40977   if (!StringHasNoText(grp->locus_tag)) {
40978     SetStringValue(note, grp->locus_tag, ExistingTextOption_prefix_semi);
40979   }
40980   if (!StringHasNoText(gene->comment)) {
40981     SetStringValue(note, gene->comment, ExistingTextOption_prefix_semi);
40982   }
40983   for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) {
40984     if (!StringHasNoText (gbq->val)) {
40985       SetStringValue(note, gbq->val, ExistingTextOption_prefix_semi);
40986     }
40987   }
40988 
40989 }
40990 
40991 
LogCDSConversion(LogInfoPtr lip,SeqFeatPtr sfp,SeqFeatPtr gene,ProtRefPtr prp)40992 static void LogCDSConversion (LogInfoPtr lip, SeqFeatPtr sfp, SeqFeatPtr gene, ProtRefPtr prp)
40993 {
40994   GeneRefPtr grp;
40995   CharPtr    desc = NULL;
40996   CharPtr    loc;
40997 
40998   if (lip == NULL || lip->fp == NULL) {
40999     return;
41000   }
41001   if (gene != NULL && (grp = gene->data.value.ptrvalue) != NULL) {
41002     if (!StringHasNoText (grp->locus_tag)) {
41003       desc = grp->locus_tag;
41004     } else if (!StringHasNoText (grp->locus)) {
41005       desc = grp->locus;
41006     }
41007   }
41008   if (desc == NULL && prp != NULL) {
41009     if (prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
41010       desc = prp->name->data.ptrvalue;
41011     }
41012   }
41013   if (desc == NULL) {
41014     desc = "Unknown";
41015   }
41016   loc = SeqLocPrint (sfp->location);
41017   fprintf (lip->fp, "%s CDS at %s converted to misc_feature", desc, loc);
41018   loc = MemFree (loc);
41019   lip->data_in_log = TRUE;
41020 }
41021 
41022 
LogrRNAConversion(LogInfoPtr lip,SeqFeatPtr sfp,SeqFeatPtr gene)41023 static void LogrRNAConversion (LogInfoPtr lip, SeqFeatPtr sfp, SeqFeatPtr gene)
41024 {
41025   GeneRefPtr grp;
41026   CharPtr    desc = NULL, loc;
41027 
41028   if (lip == NULL || lip->fp == NULL) {
41029     return;
41030   }
41031   if (gene != NULL && (grp = gene->data.value.ptrvalue) != NULL) {
41032     if (!StringHasNoText (grp->locus_tag)) {
41033       desc = StringSave(grp->locus_tag);
41034     } else if (!StringHasNoText (grp->locus)) {
41035       desc = StringSave(grp->locus);
41036     }
41037   }
41038   if (desc == NULL) {
41039     desc = GetRNAProductString(sfp, NULL);
41040   }
41041   if (desc == NULL) {
41042     desc = StringSave("unknown");
41043   }
41044   loc = SeqLocPrint (sfp->location);
41045   fprintf (lip->fp, "%s rRNA at %s converted to misc_feature", desc, loc);
41046   loc = MemFree (loc);
41047   desc = MemFree (desc);
41048   lip->data_in_log = TRUE;
41049 }
41050 
41051 
41052 /* for cleaning up bad features identified by validator or asndisc */
ConvertListToMiscFeat(ValNodePtr list,Boolean remove_gene,LogInfoPtr lip)41053 NLM_EXTERN void ConvertListToMiscFeat (ValNodePtr list, Boolean remove_gene, LogInfoPtr lip)
41054 {
41055   ValNodePtr vnp, other_list;
41056   SeqFeatPtr sfp, gene;
41057   ProtRefPtr prp;
41058   BioseqPtr  pbsp;
41059   ImpFeatPtr ifp;
41060   CharPtr    rna_name;
41061   ValNodePtr entityIDList = NULL;
41062   SeqEntryPtr sep;
41063   Boolean     converted;
41064 
41065   for (vnp = list; vnp != NULL; vnp = vnp->next) {
41066     sfp = (SeqFeatPtr) vnp->data.ptrvalue;
41067     gene = GetGeneForFeature(sfp);
41068     converted = FALSE;
41069     if (sfp->data.choice == SEQFEAT_CDREGION) {
41070       prp = GetProtRefForFeature (sfp);
41071       LogCDSConversion(lip, sfp, gene, prp);
41072       if (prp != NULL && prp->name != NULL && !StringHasNoText (prp->name->data.ptrvalue)) {
41073         SetStringValue(&(sfp->comment), prp->name->data.ptrvalue, ExistingTextOption_prefix_semi);
41074       }
41075       pbsp = BioseqFindFromSeqLoc (sfp->product);
41076       if (pbsp != NULL) {
41077         pbsp->idx.deleteme = TRUE;
41078       }
41079       sfp->data.value.ptrvalue = CdRegionFree (sfp->data.value.ptrvalue);
41080       sfp->data.choice = SEQFEAT_IMP;
41081       ifp = ImpFeatNew();
41082       ifp->key = StringSave("misc_feature");
41083       sfp->data.value.ptrvalue = ifp;
41084       sfp->product = SeqLocFree (sfp->product);
41085       sfp->idx.subtype = 0;
41086       ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID);
41087       converted = TRUE;
41088     } else if (sfp->data.choice == SEQFEAT_RNA) {
41089       LogrRNAConversion(lip, sfp, gene);
41090       rna_name = GetRNAProductString(sfp, NULL);
41091       SetStringValue(&(sfp->comment), rna_name, ExistingTextOption_prefix_semi);
41092       sfp->data.value.ptrvalue = RnaRefFree (sfp->data.value.ptrvalue);
41093       sfp->data.choice = SEQFEAT_IMP;
41094       ifp = ImpFeatNew();
41095       ifp->key = StringSave("misc_feature");
41096       sfp->data.value.ptrvalue = ifp;
41097       sfp->idx.subtype = 0;
41098       ValNodeAddInt (&entityIDList, 0, sfp->idx.entityID);
41099       converted = TRUE;
41100     }
41101     if (converted && remove_gene && gene != NULL) {
41102       other_list = GetFeaturesForGene(gene, 0);
41103       if (ValNodeLen (other_list) < 2) {
41104         AddGeneQualifiersToNote(gene, &(sfp->comment));
41105         gene->idx.deleteme = TRUE;
41106         if (lip != NULL && lip->fp != NULL) {
41107           fprintf (lip->fp, ", gene deleted");
41108         }
41109       }
41110       other_list = ValNodeFree (other_list);
41111     }
41112     if (converted && lip != NULL && lip->fp != NULL) {
41113       fprintf (lip->fp, "\n");
41114     }
41115   }
41116   entityIDList = ValNodeSort (entityIDList, SortVnpByInt);
41117   ValNodeUnique (&entityIDList, SortVnpByInt, ValNodeFree);
41118   for (vnp = entityIDList; vnp != NULL; vnp = vnp->next) {
41119     /* remove any protein sequences or genes that were marked for deletion */
41120     DeleteMarkedObjects (vnp->data.intvalue, 0, NULL);
41121     sep = GetTopSeqEntryForEntityID(vnp->data.intvalue);
41122     RenormalizeNucProtSets (sep, TRUE);
41123     SeqMgrIndexFeatures (vnp->data.intvalue, NULL);
41124   }
41125   entityIDList = ValNodeFree (entityIDList);
41126 }
41127 
41128 
41129 
41130