1 /*   asn2gnb6.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2gnb6.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 *          Mati Shomrat
30 *
31 * Version Creation Date:   10/21/98
32 *
33 * $Revision: 1.381 $
34 *
35 * File Description:  New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41 
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59 #include <findrepl.h>
60 #include <valid.h>
61 
62 #ifdef WIN_MAC
63 #if __profile__
64 #include <Profiler.h>
65 #endif
66 #endif
67 
68 static CharPtr link_tax = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
69 
70 static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
71 static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
72 
73 static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
74 static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
75 
76 static CharPtr link_lat_lon = "https://www.ncbi.nlm.nih.gov/projects/Sequin/latlonview.html?";
77 
78 static CharPtr link_gold_stamp_id = "http://genomesonline.org/cgi-bin/GOLD/bin/GOLDCards.cgi?goldstamp=";
79 
80 static CharPtr link_annot_soft_ver = "https://www.ncbi.nlm.nih.gov/genome/annotation_euk/release_notes/#version";
81 
82 static CharPtr link_annot_ver = "https://www.ncbi.nlm.nih.gov/genome/annotation_euk/";
83 
84 /* ordering arrays for qualifiers and note components */
85 
86 static SourceType source_qual_order [] = {
87   SCQUAL_organism,
88 
89   SCQUAL_organelle,
90 
91   SCQUAL_mol_type,
92 
93   SCQUAL_strain,
94   SCQUAL_sub_strain,
95   SCQUAL_variety,
96   SCQUAL_serotype,
97   SCQUAL_serovar,
98   SCQUAL_cultivar,
99   SCQUAL_isolate,
100   SCQUAL_isolation_source,
101   SCQUAL_spec_or_nat_host,
102   SCQUAL_sub_species,
103 
104   SCQUAL_specimen_voucher,
105   SCQUAL_culture_collection,
106   SCQUAL_bio_material,
107 
108   SCQUAL_type_material,
109 
110   SCQUAL_db_xref,
111   SCQUAL_org_xref,
112 
113   SCQUAL_chromosome,
114 
115   SCQUAL_segment,
116 
117   SCQUAL_map,
118   SCQUAL_clone,
119   SCQUAL_sub_clone,
120   SCQUAL_haplotype,
121   SCQUAL_haplogroup,
122   SCQUAL_sex,
123   SCQUAL_mating_type,
124   SCQUAL_cell_line,
125   SCQUAL_cell_type,
126   SCQUAL_tissue_type,
127   SCQUAL_clone_lib,
128   SCQUAL_dev_stage,
129   SCQUAL_ecotype,
130 
131   SCQUAL_germline,
132   SCQUAL_rearranged,
133   SCQUAL_transgenic,
134   SCQUAL_environmental_sample,
135 
136   SCQUAL_lab_host,
137   SCQUAL_pop_variant,
138   SCQUAL_tissue_lib,
139 
140   SCQUAL_plasmid_name,
141   SCQUAL_transposon_name,
142   SCQUAL_ins_seq_name,
143 
144   SCQUAL_country,
145 
146   SCQUAL_focus,
147 
148   SCQUAL_lat_lon,
149   SCQUAL_altitude,
150   SCQUAL_collection_date,
151   SCQUAL_collected_by,
152   SCQUAL_identified_by,
153   /*
154   SCQUAL_fwd_primer_seq,
155   SCQUAL_rev_primer_seq,
156   SCQUAL_fwd_primer_name,
157   SCQUAL_rev_primer_name,
158   */
159   SCQUAL_PCR_primers,
160   SCQUAL_PCR_reaction,
161 
162   SCQUAL_note,
163 
164   SCQUAL_sequenced_mol,
165   SCQUAL_label,
166   SCQUAL_usedin,
167   SCQUAL_citation,
168   (SourceType) 0
169 };
170 
171 static SourceType source_desc_note_order [] = {
172   SCQUAL_seqfeat_note,
173   SCQUAL_orgmod_note,
174   SCQUAL_subsource_note,
175 
176   SCQUAL_metagenomic,
177 
178   SCQUAL_linkage_group,
179 
180   SCQUAL_type,
181   SCQUAL_sub_type,
182   SCQUAL_serogroup,
183   SCQUAL_pathovar,
184   SCQUAL_chemovar,
185   SCQUAL_biovar,
186   SCQUAL_biotype,
187   SCQUAL_group,
188   SCQUAL_sub_group,
189   SCQUAL_common,
190   SCQUAL_acronym,
191   SCQUAL_dosage,
192 
193   SCQUAL_authority,
194   SCQUAL_forma,
195   SCQUAL_forma_specialis,
196   SCQUAL_synonym,
197   SCQUAL_anamorph,
198   SCQUAL_teleomorph,
199   SCQUAL_breed,
200   SCQUAL_frequency,
201 
202   SCQUAL_metagenome_source,
203   SCQUAL_metagenome_note,
204 
205   SCQUAL_genotype,
206   SCQUAL_plastid_name,
207 
208   SCQUAL_endogenous_virus_name,
209 
210   SCQUAL_common_name,
211 
212   SCQUAL_PCR_primer_note,
213   SCQUAL_PCR_reaction,
214 
215   SCQUAL_zero_orgmod,
216   SCQUAL_one_orgmod,
217   SCQUAL_zero_subsrc,
218 
219   /* SCQUAL_old_lineage, */
220 
221   /* SCQUAL_old_name, */
222   (SourceType) 0
223 };
224 
225 static SourceType source_feat_note_order [] = {
226   SCQUAL_unstructured,
227 
228   SCQUAL_metagenomic,
229 
230   SCQUAL_linkage_group,
231   SCQUAL_mating_type,
232 
233   SCQUAL_type,
234   SCQUAL_sub_type,
235   SCQUAL_serogroup,
236   SCQUAL_pathovar,
237   SCQUAL_chemovar,
238   SCQUAL_biovar,
239   SCQUAL_biotype,
240   SCQUAL_group,
241   SCQUAL_sub_group,
242   SCQUAL_common,
243   SCQUAL_acronym,
244   SCQUAL_dosage,
245 
246   SCQUAL_authority,
247   SCQUAL_forma,
248   SCQUAL_forma_specialis,
249   SCQUAL_synonym,
250   SCQUAL_anamorph,
251   SCQUAL_teleomorph,
252   SCQUAL_breed,
253   SCQUAL_frequency,
254 
255   SCQUAL_metagenome_source,
256   SCQUAL_metagenome_note,
257 
258   SCQUAL_genotype,
259   SCQUAL_plastid_name,
260 
261   SCQUAL_endogenous_virus_name,
262 
263   SCQUAL_seqfeat_note,
264   SCQUAL_orgmod_note,
265   SCQUAL_subsource_note,
266 
267   SCQUAL_common_name,
268 
269   SCQUAL_PCR_primer_note,
270   SCQUAL_PCR_reaction,
271 
272   SCQUAL_zero_orgmod,
273   SCQUAL_one_orgmod,
274   SCQUAL_zero_subsrc,
275 
276   /* SCQUAL_old_lineage, */
277 
278   /* SCQUAL_old_name, */
279   (SourceType) 0
280 };
281 
282 NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
283   { "",                         Qual_class_ignore     },
284   { "acronym",                  Qual_class_orgmod     },
285   { "altitude",                 Qual_class_subsource  },
286   { "anamorph",                 Qual_class_orgmod     },
287   { "authority",                Qual_class_orgmod     },
288   { "biotype",                  Qual_class_orgmod     },
289   { "biovar",                   Qual_class_orgmod     },
290   { "bio_material",             Qual_class_voucher    },
291   { "breed",                    Qual_class_orgmod     },
292   { "cell_line",                Qual_class_subsource  },
293   { "cell_type",                Qual_class_subsource  },
294   { "chemovar",                 Qual_class_orgmod     },
295   { "chromosome",               Qual_class_subsource  },
296   { "citation",                 Qual_class_pubset     },
297   { "clone",                    Qual_class_subsource  },
298   { "clone_lib",                Qual_class_subsource  },
299   { "collected_by",             Qual_class_subsource  },
300   { "collection_date",          Qual_class_subsource  },
301   { "common",                   Qual_class_orgmod     },
302   { "common",                   Qual_class_string     },
303   { "country",                  Qual_class_subsource  },
304   { "cultivar",                 Qual_class_orgmod     },
305   { "culture_collection",       Qual_class_voucher    },
306   { "db_xref",                  Qual_class_db_xref    },
307   { "db_xref",                  Qual_class_db_xref    },
308   { "dev_stage",                Qual_class_subsource  },
309   { "dosage",                   Qual_class_orgmod     },
310   { "ecotype",                  Qual_class_orgmod     },
311   { "endogenous_virus",         Qual_class_subsource  },
312   { "environmental_sample",     Qual_class_subsource  },
313   { "extrachromosomal",         Qual_class_boolean    },
314   { "focus",                    Qual_class_boolean    },
315   { "forma",                    Qual_class_orgmod     },
316   { "forma_specialis",          Qual_class_orgmod     },
317   { "frequency",                Qual_class_subsource  },
318   { "fwd_primer_name",          Qual_class_subsource  },
319   { "fwd_primer_seq",           Qual_class_subsource  },
320   { "gb_acronym",               Qual_class_orgmod     },
321   { "gb_anamorph",              Qual_class_orgmod     },
322   { "gb_synonym",               Qual_class_orgmod     },
323   { "genotype",                 Qual_class_subsource  },
324   { "germline",                 Qual_class_subsource  },
325   { "group",                    Qual_class_orgmod     },
326   { "haplogroup",               Qual_class_subsource  },
327   { "haplotype",                Qual_class_subsource  },
328   { "identified_by",            Qual_class_subsource  },
329   { "insertion_seq",            Qual_class_subsource  },
330   { "isolate",                  Qual_class_orgmod     },
331   { "isolation_source",         Qual_class_subsource  },
332   { "lab_host",                 Qual_class_subsource  },
333   { "label",                    Qual_class_label      },
334   { "lat_lon",                  Qual_class_lat_lon    },
335   { "linkage_group",            Qual_class_subsource  },
336   { "macronuclear",             Qual_class_boolean    },
337   { "map",                      Qual_class_subsource  },
338   { "mating_type",              Qual_class_subsource  },
339   { "derived from metagenome",  Qual_class_orgmod     },
340   { "metagenome_source",        Qual_class_orgmod     },
341   { "metagenomic",              Qual_class_subsource  },
342   { "mol_type",                 Qual_class_string     },
343   { "note",                     Qual_class_note       },
344   { "old_lineage",              Qual_class_orgmod     },
345   { "old_name",                 Qual_class_orgmod     },
346   { "organism",                 Qual_class_string     },
347   { "organelle",                Qual_class_organelle  },
348   { "orgmod_note",              Qual_class_orgmod     },
349   { "pathovar",                 Qual_class_orgmod     },
350   { "PCR_primers",              Qual_class_pcr        },
351   { "PCR_primers",              Qual_class_pcr        },
352   { "PCR_primers",              Qual_class_pcr_react  },
353   { "phenotype",                Qual_class_subsource  },
354   { "plasmid",                  Qual_class_subsource  },
355   { "plastid",                  Qual_class_subsource  },
356   { "pop_variant",              Qual_class_subsource  },
357   { "rearranged",               Qual_class_subsource  },
358   { "rev_primer_name",          Qual_class_subsource  },
359   { "rev_primer_seq",           Qual_class_subsource  },
360   { "segment",                  Qual_class_subsource  },
361   { "seqfeat_note",             Qual_class_string     },
362   { "sequenced_mol",            Qual_class_quote      },
363   { "serogroup",                Qual_class_orgmod     },
364   { "serotype",                 Qual_class_orgmod     },
365   { "serovar",                  Qual_class_orgmod     },
366   { "sex",                      Qual_class_subsource  },
367   { "host",                     Qual_class_orgmod     },
368   { "specimen_voucher",         Qual_class_voucher    },
369   { "strain",                   Qual_class_orgmod     },
370   { "sub_clone",                Qual_class_subsource  },
371   { "subgroup",                 Qual_class_orgmod     },
372   { "sub_species",              Qual_class_orgmod     },
373   { "sub_strain",               Qual_class_orgmod     },
374   { "subtype",                  Qual_class_orgmod     },
375   { "subsource_note",           Qual_class_subsource  },
376   { "synonym",                  Qual_class_orgmod     },
377   { "teleomorph",               Qual_class_orgmod     },
378   { "tissue_lib",               Qual_class_subsource  },
379   { "tissue_type",              Qual_class_subsource  },
380   { "transgenic",               Qual_class_subsource  },
381   { "transposon",               Qual_class_subsource  },
382   { "type",                     Qual_class_orgmod     },
383   { "type_material",            Qual_class_orgmod     },
384   { "unstructured",             Qual_class_valnode    },
385   { "usedin",                   Qual_class_quote      },
386   { "variety",                  Qual_class_orgmod     },
387   { "whole_replicon",           Qual_class_subsource  },
388   { "?",                        Qual_class_orgmod     },
389   { "?",                        Qual_class_orgmod     },
390   { "?",                        Qual_class_subsource  }
391 };
392 
393 NLM_EXTERN SourceType subSourceToSourceIdx [45] = {
394   SCQUAL_zero_subsrc,
395   SCQUAL_chromosome,
396   SCQUAL_map,
397   SCQUAL_clone,
398   SCQUAL_sub_clone,
399   SCQUAL_haplotype,
400   SCQUAL_genotype,
401   SCQUAL_sex,
402   SCQUAL_cell_line,
403   SCQUAL_cell_type,
404   SCQUAL_tissue_type,
405   SCQUAL_clone_lib,
406   SCQUAL_dev_stage,
407   SCQUAL_frequency,
408   SCQUAL_germline,
409   SCQUAL_rearranged,
410   SCQUAL_lab_host,
411   SCQUAL_pop_variant,
412   SCQUAL_tissue_lib,
413   SCQUAL_plasmid_name,
414   SCQUAL_transposon_name,
415   SCQUAL_ins_seq_name,
416   SCQUAL_plastid_name,
417   SCQUAL_country,
418   SCQUAL_segment,
419   SCQUAL_endogenous_virus_name,
420   SCQUAL_transgenic,
421   SCQUAL_environmental_sample,
422   SCQUAL_isolation_source,
423   SCQUAL_lat_lon,
424   SCQUAL_collection_date,
425   SCQUAL_collected_by,
426   SCQUAL_identified_by,
427   SCQUAL_fwd_primer_seq,
428   SCQUAL_rev_primer_seq,
429   SCQUAL_fwd_primer_name,
430   SCQUAL_rev_primer_name,
431   SCQUAL_metagenomic,
432   SCQUAL_mating_type,
433   SCQUAL_linkage_group,
434   SCQUAL_haplogroup,
435   SCQUAL_whole_replicon,
436   SCQUAL_phenotype,
437   SCQUAL_altitude,
438   SCQUAL_subsource_note
439 };
440 
441 /* ********************************************************************** */
442 
443 /* ********************************************************************** */
444 
445 /* format functions allocate printable string for given paragraph */
446 
447 /* superset of https://www.ncbi.nlm.nih.gov/collab/db_xref.html and RefSeq db_xrefs */
448 
449 NLM_EXTERN CharPtr legalDbXrefs [] = {
450   "AceView/WormGenes",
451   "AFTOL",
452   "AntWeb",
453   "APHIDBASE",
454   "ApiDB",
455   "ApiDB_CryptoDB",
456   "ApiDB_PlasmoDB",
457   "ApiDB_ToxoDB",
458   "Araport",
459   "ASAP",
460   "ATCC",
461   "ATCC(in host)",
462   "ATCC(dna)",
463   "Axeldb",
464   "BDGP_EST",
465   "BDGP_INS",
466   "BEEBASE",
467   "BEETLEBASE",
468   "BEI",
469   "BGD",
470   "BOLD",
471   "CDD",
472   "CGD",
473   "CK",
474   "COG",
475   "dbClone",
476   "dbCloneLib",
477   "dbEST",
478   "dbProbe",
479   "dbSNP",
480   "dbSTS",
481   "dictyBase",
482   "DSM",
483   "DSMZ",
484   "EcoGene",
485   "ENSEMBL",
486   "EnsemblGenomes",
487   "EnsemblGenomes-Gn",
488   "EnsemblGenomes-Tr",
489   "ERIC",
490   "ESTLIB",
491   "FANTOM_DB",
492   "FBOL",
493   "FLYBASE",
494   "GABI",
495   "GDB",
496   "GeneDB",
497   "GeneID",
498   "GO",
499   "GOA",
500   "Greengenes",
501   "GRIN",
502   "H-InvDB",
503   "HGNC",
504   "HMP",
505   "HOMD",
506   "HSSP",
507   "I5KNAL",
508   "IKMC",
509   "IMGT/GENE-DB",
510   "IMGT/HLA",
511   "IMGT/LIGM",
512   "InterimID",
513   "InterPro",
514   "IntrepidBio",
515   "IRD",
516   "ISD",
517   "ISFinder",
518   "ISHAM-ITS",
519   "JCM",
520   "JGIDB",
521   "LocusID",
522   "MaizeGDB",
523   "MedGen",
524   "MGI",
525   "MIM",
526   "miRBase",
527   "MycoBank",
528   "NBRC",
529   "NextDB",
530   "niaEST",
531   "NMPDR",
532   "NRESTdb",
533   "OrthoMCL",
534   "Osa1",
535   "Pathema",
536   "PBmice",
537   "PDB",
538   "PFAM",
539   "PGN",
540   "Phytozome",
541   "PIR",
542   "PomBase",
543   "PSEUDO",
544   "PseudoCap",
545   "RAP-DB",
546   "RATMAP",
547   "RFAM",
548   "RGD",
549   "RiceGenes",
550   "RZPD",
551   "SEED",
552   "SGD",
553   "SGN",
554   "SoyBase",
555   "SRPDB",
556   "SubtiList",
557   "TAIR",
558   "taxon",
559   "TIGRFAM",
560   "TubercuList",
561   "UniGene",
562   "UNILIB",
563   "UniProtKB/Swiss-Prot",
564   "UniProtKB/TrEMBL",
565   "UniSTS",
566   "UNITE",
567   "VBASE2",
568   "VectorBase",
569   "Vega",
570   "VGNC",
571   "ViPR",
572   "VISTA",
573   "WorfDB",
574   "WormBase",
575   "Xenbase",
576   "ZFIN",
577   NULL
578 };
579 
580 NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
581   "AFTOL",
582   "AntWeb",
583   "ATCC",
584   "ATCC(dna)",
585   "ATCC(in host)",
586   "BEI",
587   "BOLD",
588   "DSM",
589   "DSMZ",
590   "FANTOM_DB",
591   "FBOL",
592   "FLYBASE",
593   "Fungorum",
594   "Greengenes",
595   "GRIN",
596   "HMP",
597   "HOMD",
598   "IKMC",
599   "IMGT/HLA",
600   "IMGT/LIGM",
601   "ISHAM-ITS",
602   "JCM",
603   "MGI",
604   "MycoBank",
605   "NBRC",
606   "RBGE_garden",
607   "RBGE_herbarium",
608   "RZPD",
609   "taxon",
610   "UNILIB",
611   "UNITE",
612   NULL
613 };
614 
615 NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
616   "BioProject",
617   "BioSample",
618   "CCDS",
619   "CGNC",
620   "CloneID",
621   "CollecTF",
622   "ECOCYC",
623   "GenBank",
624   "HPM",
625   "HPRD",
626   "LRG",
627   "NASONIABASE",
628   "PBR",
629   "REBASE",
630   "RefSeq",
631   "SK-FST",
632   "VBRC",
633   NULL
634 };
635 
IsDbxrefInList(CharPtr name,CharPtr PNTR list,size_t num,BoolPtr badcapP,CharPtr PNTR goodcapP)636 static Boolean IsDbxrefInList (
637   CharPtr name,
638   CharPtr PNTR list,
639   size_t num,
640   BoolPtr badcapP,
641   CharPtr PNTR goodcapP
642 )
643 
644 {
645   Int2  L, R, mid;
646 
647   L = 0;
648   R = num;
649 
650   while (L < R) {
651     mid = (L + R) / 2;
652     if (StringICmp (list [mid], name) < 0) {
653       L = mid + 1;
654     } else {
655       R = mid;
656     }
657   }
658 
659   if (StringICmp (list [R], name) == 0) {
660     if (StringCmp (list [R], name) != 0) {
661       if (badcapP != NULL) {
662         *badcapP = TRUE;
663       }
664       if (goodcapP != NULL) {
665         *goodcapP = list [R];
666       }
667     }
668     return TRUE;
669   }
670 
671   return FALSE;
672 }
673 
DbxrefIsValid(CharPtr name,BoolPtr is_refseq_P,BoolPtr is_source_P,BoolPtr is_badcap_P,CharPtr PNTR goodcapP)674 NLM_EXTERN Boolean DbxrefIsValid (
675   CharPtr name,
676   BoolPtr is_refseq_P,
677   BoolPtr is_source_P,
678   BoolPtr is_badcap_P,
679   CharPtr PNTR goodcapP
680 )
681 
682 {
683   if (is_refseq_P != NULL) {
684     *is_refseq_P = FALSE;
685   }
686   if (is_source_P != NULL) {
687     *is_source_P = FALSE;
688   }
689   if (is_badcap_P != NULL) {
690     *is_badcap_P = FALSE;
691   }
692   if (goodcapP != NULL) {
693     *goodcapP = NULL;
694   }
695 
696   if (StringHasNoText (name)) return FALSE;
697 
698   if (IsDbxrefInList (name, legalRefSeqDbXrefs,
699                       sizeof (legalRefSeqDbXrefs) / sizeof (legalRefSeqDbXrefs [0]) - 1,
700                       is_badcap_P, goodcapP)) {
701     if (is_refseq_P != NULL) {
702       *is_refseq_P = TRUE;
703     }
704     return TRUE;
705   }
706 
707   if (IsDbxrefInList (name, legalSrcDbXrefs,
708                       sizeof (legalSrcDbXrefs) / sizeof (legalSrcDbXrefs [0]) - 1,
709                       is_badcap_P, goodcapP)) {
710     if (is_source_P != NULL) {
711       *is_source_P = TRUE;
712     }
713     return TRUE;
714   }
715 
716   if (IsDbxrefInList (name, legalDbXrefs,
717                       sizeof (legalDbXrefs) / sizeof (legalDbXrefs [0]) - 1,
718                       is_badcap_P, goodcapP)) {
719     return TRUE;
720   }
721 
722   return FALSE;
723 }
724 
725 
726 /* These functions are for testing dbxrefs */
727 
MakeDbxrefList(void)728 static ValNodePtr MakeDbxrefList (void)
729 {
730   ValNodePtr dbxref_list = NULL;
731   Int4 i;
732   DbtagPtr dbtag;
733 
734   for (i = 0; legalDbXrefs [i] != NULL; i++) {
735     dbtag = DbtagNew ();
736     dbtag->db = StringSave (legalDbXrefs [i]);
737     dbtag->tag = ObjectIdNew ();
738     dbtag->tag->id = 42;
739     ValNodeAddPointer (&dbxref_list, 0, dbtag);
740   }
741 
742   /* legalSrcDbXrefs is contained within legalDbXrefs */
743 
744   for (i = 0; legalRefSeqDbXrefs [i] != NULL; i++) {
745     dbtag = DbtagNew ();
746     dbtag->db = StringSave (legalRefSeqDbXrefs [i]);
747     dbtag->tag = ObjectIdNew ();
748     dbtag->tag->id = 42;
749     ValNodeAddPointer (&dbxref_list, 0, dbtag);
750   }
751 
752   return dbxref_list;
753 }
754 
AddDbxrefsToBioSource(BioSourcePtr biop)755 static void AddDbxrefsToBioSource (BioSourcePtr biop)
756 {
757   if (biop == NULL) return;
758   if (biop->org == NULL)
759   {
760     biop->org = OrgRefNew();
761   }
762 
763   ValNodeLink (&(biop->org->db), MakeDbxrefList());
764 }
765 
AddDbxrefsToSeqFeat(SeqFeatPtr sfp)766 static void AddDbxrefsToSeqFeat (SeqFeatPtr sfp)
767 {
768   if (sfp == NULL) return;
769   ValNodeLink (&(sfp->dbxref), MakeDbxrefList());
770 }
771 
AddAllDbxrefsToBioseq(BioseqPtr bsp)772 NLM_EXTERN void AddAllDbxrefsToBioseq (BioseqPtr bsp)
773 {
774   SeqDescrPtr sdp;
775   SeqFeatPtr  sfp;
776   SeqMgrDescContext dcontext;
777   SeqMgrFeatContext fcontext;
778 
779   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
780   if (sdp != NULL) {
781     AddDbxrefsToBioSource (sdp->data.ptrvalue);
782   }
783 
784   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
785   if (sfp != NULL) {
786     AddDbxrefsToBioSource (sfp->data.value.ptrvalue);
787     AddDbxrefsToSeqFeat (sfp);
788   }
789 
790   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
791   if (sfp != NULL) {
792     AddDbxrefsToSeqFeat (sfp);
793   }
794 
795   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
796   if (sfp != NULL) {
797     AddDbxrefsToSeqFeat (sfp);
798   }
799 }
800 
801 
802 
803 static CharPtr organellePrefix [] = {
804   NULL,
805   NULL,
806   "Chloroplast ",
807   "Chromoplast ",
808   "Kinetoplast ",
809   "Mitochondrion ",
810   "Plastid ",
811   NULL,
812   NULL,
813   NULL,
814   NULL,
815   NULL,
816   "Cyanelle ",
817   NULL,
818   NULL,
819   "Nucleomorph ",
820   "Apicoplast ",
821   "Leucoplast ",
822   "Proplastid ",
823   NULL,
824   "Hydrogenosome ",
825   NULL,
826   "Chromatophore "
827 };
828 
829 static CharPtr newOrganellePrefix [] = {
830   NULL,
831   NULL,
832   "chloroplast ",
833   "chromoplast ",
834   "kinetoplast ",
835   "mitochondrion ",
836   "plastid ",
837   NULL,
838   NULL,
839   NULL,
840   NULL,
841   NULL,
842   "cyanelle ",
843   NULL,
844   NULL,
845   "nucleomorph ",
846   "apicoplast ",
847   "leucoplast ",
848   "proplastid ",
849   NULL,
850   "hydrogenosome ",
851   NULL,
852   "chromatophore "
853 };
854 
FormatSourceBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)855 NLM_EXTERN CharPtr FormatSourceBlock (
856   Asn2gbFormatPtr afp,
857   BaseBlockPtr bbp
858 )
859 
860 {
861   CharPtr            acr = NULL;
862   Boolean            addPeriod = TRUE;
863   IntAsn2gbJobPtr    ajp;
864   CharPtr            ana = NULL;
865   Asn2gbSectPtr      asp;
866   BioSourcePtr       biop = NULL;
867   CharPtr            com = NULL;
868   CharPtr            common = NULL;
869   SeqMgrDescContext  dcontext;
870   SeqMgrFeatContext  fcontext;
871   CharPtr            gbacr = NULL;
872   CharPtr            gbana = NULL;
873   GBBlockPtr         gbp = NULL;
874   GBSeqPtr           gbseq;
875   CharPtr            gbsyn = NULL;
876   Uint1              genome;
877   CharPtr            met = NULL;
878   ValNodePtr         mod = NULL;
879   Int2               numacr = 0;
880   Int2               numana = 0;
881   Int2               numcom = 0;
882   Int2               numgbacr = 0;
883   Int2               numgbana = 0;
884   Int2               numgbsyn = 0;
885   Int2               nummet = 0;
886   Int2               numsyn = 0;
887   OrgModPtr          omp = NULL;
888   OrgNamePtr         onp;
889   CharPtr            organelle = NULL;
890   OrgRefPtr          orp;
891   CharPtr            prefix = " (";
892   SeqDescrPtr        sdp;
893   CharPtr            second = NULL;
894   SeqFeatPtr         sfp;
895   CharPtr            str;
896   CharPtr            syn = NULL;
897   CharPtr            taxname = NULL;
898   StringItemPtr      ffstring, temp;
899 
900   if (afp == NULL || bbp == NULL) return NULL;
901   ajp = afp->ajp;
902   if (ajp == NULL) return NULL;
903   asp = afp->asp;
904   if (asp == NULL) return NULL;
905 
906   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
907 
908   ffstring = FFGetString(ajp);
909   if ( ffstring == NULL ) return NULL;
910 
911   if (bbp->itemtype == OBJ_SEQDESC) {
912     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
913     if (sdp != NULL) {
914       if (dcontext.seqdesctype == Seq_descr_source) {
915         biop = (BioSourcePtr) sdp->data.ptrvalue;
916       } else if (dcontext.seqdesctype == Seq_descr_genbank) {
917         gbp = (GBBlockPtr) sdp->data.ptrvalue;
918       }
919     }
920   } else if (bbp->itemtype == OBJ_SEQFEAT) {
921     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
922     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
923       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
924     }
925   }
926   if (gbp != NULL) {
927     common = gbp->source;
928   }
929 
930   if (biop != NULL) {
931     genome = biop->genome;
932     if (genome <= 22) {
933       if (ajp->newSourceOrg && (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT)) {
934         organelle = newOrganellePrefix [genome];
935       } else {
936         organelle = organellePrefix [genome];
937       }
938     }
939     orp = biop->org;
940     if (orp != NULL) {
941       taxname = orp->taxname;
942       common = orp->common;
943       mod = orp->mod;
944       onp = orp->orgname;
945       if (onp != NULL) {
946 
947         if (ajp->newSourceOrg) {
948           for (omp = onp->mod; omp != NULL; omp = omp->next) {
949             switch (omp->subtype) {
950               case ORGMOD_common :
951                 com = omp->subname;
952                 numcom++;
953                 break;
954               case ORGMOD_acronym :
955                 acr = omp->subname;
956                 numacr++;
957                 break;
958               case ORGMOD_synonym :
959                 syn = omp->subname;
960                 numsyn++;
961                 break;
962               case ORGMOD_anamorph :
963                 ana = omp->subname;
964                 numana++;
965                 break;
966               case ORGMOD_gb_acronym :
967                 gbacr = omp->subname;
968                 numgbacr++;
969                 break;
970               case ORGMOD_gb_anamorph :
971                 gbana = omp->subname;
972                 numgbana++;
973                 break;
974               case ORGMOD_gb_synonym :
975                 gbsyn = omp->subname;
976                 numgbsyn++;
977                 break;
978               case ORGMOD_metagenome_source :
979                 met = omp->subname;
980                 nummet++;
981                 break;
982               default :
983                 break;
984             }
985           }
986 
987           if (numacr > 1) {
988              acr = NULL;
989           }
990           if (numana > 1) {
991              ana = NULL;
992           }
993           if (numcom > 1) {
994              com = NULL;
995           }
996           if (nummet > 1) {
997              met = NULL;
998           }
999           if (numsyn > 1) {
1000              syn = NULL;
1001           }
1002           if (numgbacr > 1) {
1003              gbacr = NULL;
1004           }
1005           if (numgbana > 1) {
1006              gbana = NULL;
1007           }
1008           if (numgbsyn > 1) {
1009              gbsyn = NULL;
1010           }
1011 
1012           if (StringHasNoText (second)) {
1013             second = met;
1014           }
1015           if (StringHasNoText (second)) {
1016             second = syn;
1017           }
1018            if (StringHasNoText (second)) {
1019              second = acr;
1020           }
1021           if (StringHasNoText (second)) {
1022             if (StringDoesHaveText (ana)) {
1023               second = ana;
1024               prefix = " (anamorph: ";
1025             }
1026           }
1027           if (StringHasNoText (second)) {
1028             second = com;
1029           }
1030 
1031           if (StringHasNoText (second)) {
1032             second = gbsyn;
1033           }
1034           if (StringHasNoText (second)) {
1035             second = gbacr;
1036           }
1037           if (StringHasNoText (second)) {
1038             if (StringDoesHaveText (gbana)) {
1039               second = gbana;
1040               prefix = " (anamorph: ";
1041             }
1042           }
1043         }
1044       }
1045       if (StringHasNoText (second)) {
1046         second = common;
1047       }
1048     }
1049   }
1050 
1051   /* If the organelle prefix is already on the */
1052   /* name, don't add it.                       */
1053 
1054   if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0)
1055     organelle = "";
1056 
1057   if (StringHasNoText (common)) {
1058     common = taxname;
1059   }
1060   if (StringHasNoText (common)) {
1061     common = "Unknown.";
1062   }
1063   if (StringHasNoText (taxname)) {
1064     taxname = "Unknown.";
1065   }
1066 
1067   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1068 
1069     temp = FFGetString(ajp);
1070 
1071     if (ajp->newSourceOrg) {
1072 
1073       if (! StringHasNoText (organelle)) {
1074         FFAddTextToString(temp, NULL, organelle, NULL, FALSE, FALSE, TILDE_IGNORE);
1075       }
1076       FFAddTextToString(temp, NULL, taxname, NULL, FALSE, FALSE, TILDE_IGNORE);
1077       if (! StringHasNoText (second)) {
1078         FFAddTextToString(temp, prefix, second, ")", FALSE, FALSE, TILDE_IGNORE);
1079       }
1080       addPeriod = FALSE;
1081 
1082     } else {
1083       FFAddTextToString(temp, NULL, common, NULL, FALSE, FALSE, TILDE_IGNORE);
1084       while (mod != NULL) {
1085         str = (CharPtr) mod->data.ptrvalue;
1086         if (! StringHasNoText (str)) {
1087           FFAddTextToString(temp, " ", str, NULL, FALSE, FALSE, TILDE_IGNORE);
1088         }
1089         mod = mod->next;
1090       }
1091     }
1092 
1093     str = FFToCharPtr(temp);
1094     if (StringCmp (str, ".") == 0) {
1095       str = MemFree (str);
1096     }
1097     FFRecycleString(ajp, temp);
1098     /* optionally populate gbseq for XML-ized GenBank format */
1099 
1100     if (ajp->gbseq) {
1101       gbseq = &asp->gbseq;
1102     } else {
1103       gbseq = NULL;
1104     }
1105 
1106     if (gbseq != NULL) {
1107       gbseq->source = StringSave (str);
1108     }
1109 
1110 
1111     FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1112     if (str != NULL) {
1113       FFAddTextToString(ffstring, NULL, str, NULL, addPeriod, FALSE, TILDE_TO_SPACES);
1114     } else {
1115       FFAddOneChar(ffstring, '.', FALSE);
1116     }
1117 
1118     MemFree (str);
1119 
1120   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1121 
1122     FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1123     FFAddTextToString(ffstring, NULL, taxname, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1124     if ( StringICmp(taxname, common) != 0 ) {
1125         FFAddTextToString(ffstring, " (", common, ")", FALSE, FALSE, TILDE_TO_SPACES);
1126     }
1127   }
1128 
1129   str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 0, 5, "OS");
1130   FFRecycleString(ajp, ffstring);
1131   return str;
1132 }
1133 
FormatOrganismBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)1134 NLM_EXTERN CharPtr FormatOrganismBlock (
1135   Asn2gbFormatPtr afp,
1136   BaseBlockPtr bbp
1137 )
1138 
1139 {
1140   IntAsn2gbJobPtr    ajp;
1141   Asn2gbSectPtr      asp;
1142   BioSourcePtr       biop = NULL;
1143   Char               ch;
1144   CharPtr            common = NULL;
1145   DbtagPtr           dbt;
1146   SeqMgrDescContext  dcontext;
1147   SeqMgrFeatContext  fcontext;
1148   GBSeqPtr           gbseq;
1149   Uint1              genome;
1150   CharPtr            lineage = NULL;
1151   ObjectIdPtr        oip;
1152   OrgModPtr          omp;
1153   OrgNamePtr         onp;
1154   CharPtr            organelle = NULL;
1155   OrgRefPtr          orp;
1156   SeqDescrPtr        sdp;
1157   SeqFeatPtr         sfp;
1158   CharPtr            str;
1159   Int4               taxid = -1;
1160   CharPtr            taxname = NULL;
1161   CharPtr            tmp;
1162   CharPtr            ptr;
1163   ValNodePtr         vnp;
1164   StringItemPtr      ffstring, temp;
1165   Char               buf [16];
1166 
1167   if (afp == NULL || bbp == NULL) return NULL;
1168   ajp = afp->ajp;
1169   if (ajp == NULL) return NULL;
1170   asp = afp->asp;
1171   if (asp == NULL) return NULL;
1172 
1173 
1174   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
1175 
1176   if (bbp->itemtype == OBJ_SEQDESC) {
1177     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
1178     if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
1179       biop = (BioSourcePtr) sdp->data.ptrvalue;
1180     }
1181   } else if (bbp->itemtype == OBJ_SEQFEAT) {
1182     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
1183     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
1184       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
1185     }
1186   }
1187   if (biop != NULL) {
1188     genome = biop->genome;
1189     if (genome <= 22) {
1190       organelle = organellePrefix [genome];
1191     }
1192     orp = biop->org;
1193     if (orp != NULL) {
1194       taxname = orp->taxname;
1195       common = orp->common;
1196       onp = orp->orgname;
1197       if (onp != NULL) {
1198         lineage = onp->lineage;
1199         if (StringHasNoText (lineage)) {
1200           for (omp = onp->mod; omp != NULL; omp = omp->next) {
1201             if (omp->subtype == ORGMOD_old_lineage) {
1202               lineage = omp->subname;
1203             }
1204           }
1205         }
1206       }
1207       for (vnp = orp->db; vnp != NULL; vnp = vnp->next) {
1208         dbt = (DbtagPtr) vnp->data.ptrvalue;
1209         if (dbt == NULL) continue;
1210         if (StringCmp (dbt->db, "taxon") == 0) {
1211           oip = dbt->tag;
1212           if (oip != NULL) {
1213             taxid = oip->id;
1214           }
1215         }
1216       }
1217     }
1218   }
1219 
1220   /* If the organelle prefix is already on the */
1221   /* name, don't add it.                       */
1222 
1223   if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
1224     organelle = "";
1225 
1226   if (StringHasNoText (common)) {
1227     common = taxname;
1228   }
1229   if (StringHasNoText (common)) {
1230     common = "Unknown.";
1231   }
1232   if (StringHasNoText (taxname)) {
1233     taxname = "Unknown.";
1234   }
1235   if (StringHasNoText (lineage)) {
1236     lineage = "Unclassified.";
1237   }
1238 
1239   ffstring = FFGetString(ajp);
1240   temp = FFGetString(ajp);
1241   if ( ffstring == NULL || temp == NULL ) return NULL;
1242 
1243   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1244 
1245     FFStartPrint(temp, afp->format, 2, 12, "ORGANISM", 12, 5, 5, "OC", FALSE);
1246     if (! ajp->newSourceOrg) {
1247       FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1248     }
1249     if (StringNICmp (taxname, "Unknown", 7) != 0) {
1250       if ( GetWWW(ajp) ) {
1251         if (taxid != -1) {
1252           FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1253           FF_Add_NCBI_Base_URL (temp, link_tax);
1254           FFAddOneString(temp, "id=", FALSE, FALSE, TILDE_IGNORE);
1255           sprintf (buf, "%ld", (long) taxid);
1256           FFAddOneString(temp, buf, FALSE, FALSE, TILDE_IGNORE);
1257           FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1258         } else {
1259           FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1260           FF_Add_NCBI_Base_URL (temp, link_tax);
1261           FFAddOneString(temp, "name=", FALSE, FALSE, TILDE_IGNORE);
1262           tmp = StringSave (taxname);
1263           if (tmp != NULL) {
1264             ptr = tmp;
1265             ch = *ptr;
1266             while (ch != '\0') {
1267               if (IS_WHITESP (ch)) {
1268                 *ptr = '+';
1269               }
1270               ptr++;
1271               ch = *ptr;
1272             }
1273             FFAddOneString(temp, tmp, FALSE, FALSE, TILDE_IGNORE);
1274             MemFree (tmp);
1275           }
1276           FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1277         }
1278         FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1279         FFAddOneString(temp, "</a>", FALSE, FALSE, TILDE_IGNORE);
1280       } else {
1281         FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1282       }
1283     } else {
1284       FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1285     }
1286     FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1287     FFRecycleString(ajp, temp);
1288 
1289     temp = FFGetString(ajp);
1290     FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1291     FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1292     FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1293     FFRecycleString(ajp, temp);
1294     /* optionally populate gbseq for XML-ized GenBank format */
1295 
1296     if (ajp->gbseq) {
1297       gbseq = &asp->gbseq;
1298     } else {
1299       gbseq = NULL;
1300     }
1301 
1302     if (gbseq != NULL) {
1303       temp = FFGetString(ajp);
1304       if (! ajp->newSourceOrg) {
1305         FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1306       }
1307       FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1308       gbseq->organism = FFToCharPtr(temp);
1309       gbseq->taxonomy = StringSave (lineage);
1310       FFRecycleString(ajp, temp);
1311     }
1312 
1313   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1314     FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1315     FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1316     FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC");
1317     FFRecycleString(ajp, temp);
1318     if ( !StringHasNoText(organelle) ) {
1319       temp = FFGetString(ajp);
1320       if ( temp != NULL ) {
1321         FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OG", FALSE);
1322         FFAddTextToString(temp, NULL, organelle, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1323         FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG");
1324         FFRecycleString(ajp, temp);
1325       }
1326     }
1327   }
1328 
1329   str = FFToCharPtr(ffstring);
1330   FFRecycleString(ajp, ffstring);
1331   return str;
1332 }
1333 
1334 /* A tilde is not an EOL if it is found in a string of the form:    */
1335 /* /~alpahnumdot/ where alphanumdot is either alpha numeric or '.' */
1336 /*                                                                 */
1337 /* str points to the tilde in question.                            */
IsTildeEOL(CharPtr str)1338 static Boolean IsTildeEOL(CharPtr str) {
1339   CharPtr ptr;
1340 
1341   if ( *(str - 1) != '/' ) return TRUE;
1342 
1343   ++str;
1344 
1345 
1346   for ( ptr = str;
1347     IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
1348     ++ptr) continue;
1349 
1350   return *ptr == '/' ? FALSE : TRUE;
1351 }
1352 
1353 /* returns a pointer to the first character past the url */
FindUrlEnding(CharPtr str)1354 static CharPtr FindUrlEnding(CharPtr str) {
1355   CharPtr ptr;
1356 
1357   for ( ptr = str;
1358         !IS_WHITESP(*ptr) && *ptr != '\0' && *ptr != '(' && *ptr != '\"';
1359         ++ptr  ) {
1360     if ( *ptr == '~' ) {
1361       if ( IsTildeEOL(ptr) ) break;
1362     }
1363   }
1364 
1365   --ptr;
1366 
1367   /* back up over any trailing periods, commas, or parentheses */
1368   while ( (*ptr == '.') || (*ptr == ',') || (*ptr == ')') ) {
1369     --ptr;
1370   }
1371 
1372   ++ptr;
1373 
1374   return ptr;
1375 }
1376 
CommentHasSuspiciousHtml(IntAsn2gbJobPtr ajp,CharPtr searchString)1377 NLM_EXTERN Boolean CommentHasSuspiciousHtml (
1378   IntAsn2gbJobPtr ajp,
1379   CharPtr searchString
1380 )
1381 
1382 {
1383   Char        ch;
1384   CharPtr     ptr;
1385   Int4        state;
1386   ValNodePtr  matches;
1387 
1388   if (StringHasNoText (searchString)) return FALSE;
1389 
1390   state = 0;
1391   ptr = searchString;
1392   ch = *ptr;
1393 
1394   while (ch != '\0') {
1395     matches = NULL;
1396     ch = TO_LOWER (ch);
1397     state = TextFsaNext (ajp->bad_html_fsa, state, ch, &matches);
1398     if (matches != NULL) {
1399       return TRUE;
1400     }
1401     ptr++;
1402     ch = *ptr;
1403   }
1404 
1405   return FALSE;
1406 }
1407 
AddCommentWithURLlinks(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr prefix,CharPtr str,CharPtr suffix)1408 NLM_EXTERN void AddCommentWithURLlinks (
1409   IntAsn2gbJobPtr ajp,
1410   StringItemPtr ffstring,
1411   CharPtr prefix,
1412   CharPtr str,
1413   CharPtr suffix
1414 )
1415 
1416 {
1417   Char     ch;
1418   CharPtr  ptr;
1419 
1420   if (GetWWW (ajp) && CommentHasSuspiciousHtml (ajp, str)) {
1421     if (prefix != NULL) {
1422       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1423     }
1424     AddCommentStringWithTildes (ffstring, str);
1425     if (suffix != NULL) {
1426       FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1427     }
1428     return;
1429   }
1430 
1431   /*
1432   if (GetWWW (ajp)) {
1433     str = EncodeXmlEx (str);
1434   }
1435   */
1436 
1437   while (! StringHasNoText (str)) {
1438     ptr = StringStr (str, "http://");
1439     if (ptr == NULL) {
1440       ptr = StringStr (str, "https://");
1441     }
1442     if (ptr == NULL) {
1443       if (prefix != NULL) {
1444         FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1445       }
1446       AddCommentStringWithTildes (ffstring, str);
1447       if (suffix != NULL) {
1448         FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1449       }
1450       return;
1451     }
1452 
1453     *ptr = '\0';
1454     AddCommentStringWithTildes (ffstring, str);
1455     *ptr = 'h';
1456 
1457     str = ptr;
1458     ptr = FindUrlEnding(str);
1459 
1460 
1461     ch = *ptr;
1462     *ptr = '\0';
1463     if ( GetWWW(ajp) ) {
1464       FFAddTextToString(ffstring, "<a href=\"", str, "\">", FALSE, FALSE, TILDE_IGNORE);
1465       FFAddTextToString(ffstring, NULL, str, "</a>", FALSE, FALSE, TILDE_IGNORE);
1466     } else {
1467       FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
1468     }
1469 
1470     *ptr = ch;
1471     str = ptr;
1472   }
1473 }
1474 
StrucCommentFFEndPrint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,Int2 gb_init_indent,Int2 gb_cont_indent,Int2 eb_init_indent,Int2 eb_cont_indent,CharPtr eb_line_prefix)1475 static CharPtr StrucCommentFFEndPrint (
1476   IntAsn2gbJobPtr ajp,
1477   StringItemPtr ffstring,
1478   FmtType format,
1479   Int2 gb_init_indent,
1480   Int2 gb_cont_indent,
1481   Int2 eb_init_indent,
1482   Int2 eb_cont_indent,
1483   CharPtr eb_line_prefix
1484 )
1485 {
1486   StringItemPtr temp = FFGetString(ajp);
1487   CharPtr result;
1488 
1489   if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
1490 
1491   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1492     FFLineWrap (ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX - 12, NULL);
1493   } else {
1494     FFLineWrap (ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX - 5, eb_line_prefix);
1495   }
1496   result = FFToCharPtr (temp);
1497   FFRecycleString (ajp, temp);
1498   return result;
1499 }
1500 
ThresholdForStructuredCommentColumnarDisplay(FmtType format)1501 static size_t ThresholdForStructuredCommentColumnarDisplay (
1502   FmtType format
1503 )
1504 {
1505   // We are trying to make those structured comments look pretty. However, if the first column gets
1506   //  too big, the printout starts to look ugly. This function attempts to define the first column
1507   //  extent at which pretty turns into ugly.
1508 
1509   const size_t MAX_COLUMN_WIDTH = 45;
1510   switch ( format ) {
1511 
1512     case GENBANK_FMT:
1513     case GENPEPT_FMT:
1514       return MIN( MAX_COLUMN_WIDTH, ASN2FF_GB_MAX - 12 );
1515 
1516     default:
1517       return MIN( MAX_COLUMN_WIDTH, ASN2FF_EMBL_MAX - 5 );
1518   }
1519 }
1520 
GetStrForStructuredComment(IntAsn2gbJobPtr ajp,UserObjectPtr uop)1521 NLM_EXTERN CharPtr GetStrForStructuredComment (
1522   IntAsn2gbJobPtr ajp,
1523   UserObjectPtr uop
1524 )
1525 
1526 {
1527   Char           buf [132];
1528   Char           ch;
1529   UserFieldPtr   curr;
1530   StringItemPtr  ffstring;
1531   CharPtr        field;
1532   ValNodePtr     head = NULL;
1533   size_t         len;
1534   CharPtr        link_annot_tmp;
1535   size_t         max = 0;
1536   ObjectIdPtr    oip;
1537   CharPtr        prefix = NULL;
1538   CharPtr        provider = NULL;
1539   CharPtr        ptr;
1540   CharPtr        status = NULL;
1541   CharPtr        str;
1542   CharPtr        suffix = NULL;
1543   CharPtr        tmp;
1544 
1545   if (ajp == NULL || uop == NULL) return NULL;
1546   if ((oip = uop->type) == NULL) return NULL;
1547   if (StringCmp (oip->str, "StructuredComment") != 0) return NULL;
1548 
1549   ffstring = FFGetString (ajp);
1550   if (ffstring == NULL) return NULL;
1551 
1552   for (curr = uop->data; curr != NULL; curr = curr->next) {
1553    if (curr->choice != 1) continue;
1554     oip = curr->label;
1555     if (oip == NULL) continue;
1556     field = oip->str;
1557     if (StringHasNoText (field)) continue;
1558     if (StringCmp (field, "StructuredCommentPrefix") == 0) {
1559       str = (CharPtr) curr->data.ptrvalue;
1560       if (StringDoesHaveText (str)) {
1561         prefix = str;
1562       }
1563       continue;
1564     }
1565     if (StringCmp (field, "StructuredCommentSuffix") == 0) {
1566       str = (CharPtr) curr->data.ptrvalue;
1567       if (StringDoesHaveText (str)) {
1568         suffix = str;
1569       }
1570       continue;
1571     }
1572     if (StringCmp (field, "Annotation Provider") == 0) {
1573       str = (CharPtr) curr->data.ptrvalue;
1574       if (StringDoesHaveText (str)) {
1575         provider = str;
1576       }
1577     } else if (StringCmp (field, "Annotation Status") == 0) {
1578       str = (CharPtr) curr->data.ptrvalue;
1579       if (StringDoesHaveText (str)) {
1580         status = str;
1581       }
1582     }
1583     len = StringLen (field);
1584     if (len > max) {
1585       max = len;
1586     }
1587   }
1588 
1589   if (StringHasNoText (prefix)) {
1590     prefix = "##Metadata-START##";
1591   }
1592   if (StringHasNoText (suffix)) {
1593     suffix = "##Metadata-END##";
1594   }
1595 
1596   if (StringDoesHaveText (prefix)) {
1597     tmp = (CharPtr) MemNew (StringLen (prefix) + 4);
1598     if (tmp != NULL) {
1599       sprintf (tmp, "%s\n", prefix);
1600       ValNodeAddStr (&head, 0, tmp);
1601     }
1602   }
1603   if (max > ThresholdForStructuredCommentColumnarDisplay (ajp->format)) {
1604     for (curr = uop->data; curr != NULL; curr = curr->next) {
1605      if (curr->choice != 1) continue;
1606       oip = curr->label;
1607       if (oip == NULL) continue;
1608       field = oip->str;
1609       if (StringHasNoText (field)) continue;
1610       if (StringCmp (field, "StructuredCommentPrefix") == 0) continue;
1611       if (StringCmp (field, "StructuredCommentSuffix") == 0) continue;
1612       str = (CharPtr) curr->data.ptrvalue;
1613       if (StringHasNoText (str)) continue;
1614       ValNodeCopyStr (&head, 0, field);
1615       /*
1616       ValNodeCopyStr (&head, 0, " ");
1617       */
1618       ValNodeCopyStr (&head, 0, " :: ");
1619       ValNodeCopyStr (&head, 0, str);
1620       ValNodeCopyStr (&head, 0, "\n");
1621     }
1622   } else {
1623     for (curr = uop->data; curr != NULL; curr = curr->next) {
1624      if (curr->choice != 1) continue;
1625       oip = curr->label;
1626       if (oip == NULL) continue;
1627       field = oip->str;
1628       if (StringHasNoText (field)) continue;
1629       if (StringCmp (field, "StructuredCommentPrefix") == 0) continue;
1630       if (StringCmp (field, "StructuredCommentSuffix") == 0) continue;
1631       str = (CharPtr) curr->data.ptrvalue;
1632       if (StringHasNoText (str)) continue;
1633       len = max + StringLen (str) + 4;
1634       /*
1635       FFStartPrint (ffstring, GENBANK_FMT, 0, max + 1, field, max + 1, 0, max + 1, field, TRUE);
1636       */
1637       StringNCpy_0 (buf, field, sizeof (buf) - 40);
1638       StringCat (buf, "                                         ");
1639       buf [max + 1] = ':';
1640       buf [max + 2] = ':';
1641       buf [max + 3] = '\0';
1642       FFStartPrint (ffstring, GENBANK_FMT, 0, max + 4, buf, max + 4, 0, max + 4, buf, TRUE);
1643 
1644       if (GetWWW (ajp) && StringCmp (field, "GOLD Stamp ID") == 0 && StringNCmp (str, "Gi", 2) == 0) {
1645         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1646         FF_Add_NCBI_Base_URL (ffstring, link_gold_stamp_id);
1647         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1648         /* FFAddOneString (ffstring, ".html", FALSE, FALSE, TILDE_EXPAND); */
1649         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1650         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1651         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1652 
1653       } else if (GetWWW (ajp) &&
1654         StringCmp (prefix, "##Genome-Annotation-Data-START##") == 0 &&
1655         StringCmp (field, "Annotation Software Version") == 0) {
1656         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1657         FF_Add_NCBI_Base_URL (ffstring, link_annot_soft_ver);
1658         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1659         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1660         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1661         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1662 
1663       } else if (GetWWW (ajp) &&
1664         StringCmp (prefix, "##Genome-Annotation-Data-START##") == 0 &&
1665         StringCmp (field, "Annotation Version") == 0 &&
1666         StringCmp (provider, "NCBI") == 0 &&
1667         StringCmp (status, "Full annotation") == 0) {
1668         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1669         FF_Add_NCBI_Base_URL (ffstring, link_annot_ver);
1670         link_annot_tmp = StringSave (str);
1671         if (link_annot_tmp != NULL) {
1672           ptr = StringStr (link_annot_tmp, " Annotation Release ");
1673           if (ptr != NULL) {
1674             *ptr = '\0';
1675             StringCat (link_annot_tmp, "/");
1676             ptr += 20;
1677             StringCat (link_annot_tmp, ptr);
1678             ptr = link_annot_tmp;
1679             ch = *ptr;
1680             while (ch != '\0') {
1681               if (ch == ' ') {
1682                 *ptr = '_';
1683               }
1684               ptr++;
1685               ch = *ptr;
1686             }
1687           }
1688           FFAddOneString (ffstring, link_annot_tmp, FALSE, FALSE, TILDE_EXPAND);
1689           MemFree (link_annot_tmp);
1690         }
1691         FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1692         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1693         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1694 
1695       } else if (GetWWW (ajp) && StringCmp (field, "url") == 0) {
1696         AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
1697       } else if (GetWWW (ajp) && StringNICmp (str, "http://", 7) == 0) {
1698         AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
1699       } else if (GetWWW (ajp) && StringNICmp (str, "https://", 8) == 0) {
1700         AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
1701       } else {
1702         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1703       }
1704       /*
1705       FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
1706       */
1707       /*
1708       tmp = StrucCommentFFEndPrint (ajp, ffstring, ajp->format, max + 1, max + 1, 0, max + 1, NULL);
1709       */
1710       tmp = StrucCommentFFEndPrint (ajp, ffstring, ajp->format, max + 4, max + 4, 0, max + 4, NULL);
1711       ValNodeCopyStr (&head, 0, tmp);
1712       MemFree (tmp);
1713       FFRecycleString (ajp, ffstring);
1714       ffstring = FFGetString (ajp);
1715       /*
1716       tmp = (CharPtr) MemNew (len);
1717       if (tmp == NULL) continue;
1718       StringCpy (tmp, field);
1719       len = StringLen (tmp);
1720       while (len < max) {
1721         tmp [len] = ' ';
1722         len++;
1723       }
1724       tmp [len] = '\0';
1725       StringCat (tmp, " ");
1726       StringCat (tmp, str);
1727       StringCat (tmp, "\n");
1728       ValNodeCopyStr (&head, 0, tmp);
1729       MemFree (tmp);
1730       */
1731     }
1732   }
1733   if (StringDoesHaveText (suffix)) {
1734     tmp = (CharPtr) MemNew (StringLen (suffix) + 4);
1735     if (tmp != NULL) {
1736       sprintf (tmp, "%s\n", suffix);
1737       ValNodeAddStr (&head, 0, tmp);
1738     }
1739   }
1740 
1741   if (head == NULL) return NULL;
1742 
1743   str = MergeFFValNodeStrs (head);
1744   ValNodeFreeData (head);
1745 
1746   FFRecycleString (ajp, ffstring);
1747 
1748   return str;
1749 }
1750 
GetStructuredCommentTable(IntAsn2gbJobPtr ajp,UserObjectPtr uop)1751 static CharPtr GetStructuredCommentTable (
1752   IntAsn2gbJobPtr ajp,
1753   UserObjectPtr uop
1754 )
1755 
1756 {
1757   UserFieldPtr  curr;
1758   CharPtr       field;
1759   ValNodePtr    head = NULL;
1760   ObjectIdPtr   oip;
1761   CharPtr       prefix = NULL;
1762   CharPtr       str;
1763   CharPtr       suffix = NULL;
1764 
1765   if (ajp == NULL || uop == NULL) return NULL;
1766   if ((oip = uop->type) == NULL) return NULL;
1767   if (StringCmp (oip->str, "StructuredComment") != 0) return NULL;
1768 
1769   for (curr = uop->data; curr != NULL; curr = curr->next) {
1770    if (curr->choice != 1) continue;
1771     oip = curr->label;
1772     if (oip == NULL) continue;
1773     field = oip->str;
1774     if (StringHasNoText (field)) continue;
1775     if (StringCmp (field, "StructuredCommentPrefix") == 0) {
1776       str = (CharPtr) curr->data.ptrvalue;
1777       if (StringDoesHaveText (str)) {
1778         prefix = str;
1779       }
1780       continue;
1781     }
1782     if (StringCmp (field, "StructuredCommentSuffix") == 0) {
1783       str = (CharPtr) curr->data.ptrvalue;
1784       if (StringDoesHaveText (str)) {
1785         suffix = str;
1786       }
1787       continue;
1788     }
1789   }
1790 
1791   if (StringHasNoText (prefix)) {
1792     prefix = "##Metadata-START##";
1793   }
1794   if (StringHasNoText (suffix)) {
1795     suffix = "##Metadata-END##";
1796   }
1797 
1798   if (StringDoesHaveText (prefix)) {
1799     ValNodeCopyStr (&head, 0, prefix);
1800     if (ajp->oldXmlPolicy) {
1801       ValNodeCopyStr (&head, 0, "\n");
1802     } else {
1803       ValNodeCopyStr (&head, 0, "\\n");
1804     }
1805   }
1806 
1807   for (curr = uop->data; curr != NULL; curr = curr->next) {
1808    if (curr->choice != 1) continue;
1809     oip = curr->label;
1810     if (oip == NULL) continue;
1811     field = oip->str;
1812     if (StringHasNoText (field)) continue;
1813     if (StringCmp (field, "StructuredCommentPrefix") == 0) continue;
1814     if (StringCmp (field, "StructuredCommentSuffix") == 0) continue;
1815     str = (CharPtr) curr->data.ptrvalue;
1816     if (StringHasNoText (str)) continue;
1817     ValNodeCopyStr (&head, 0, field);
1818     if (ajp->oldXmlPolicy) {
1819       ValNodeCopyStr (&head, 0, "\t");
1820     } else {
1821       ValNodeCopyStr (&head, 0, "\\t");
1822     }
1823     ValNodeCopyStr (&head, 0, str);
1824     if (ajp->oldXmlPolicy) {
1825       ValNodeCopyStr (&head, 0, "\n");
1826     } else {
1827       ValNodeCopyStr (&head, 0, "\\n");
1828     }
1829   }
1830 
1831   if (StringDoesHaveText (suffix)) {
1832     ValNodeCopyStr (&head, 0, suffix);
1833     if (ajp->oldXmlPolicy) {
1834       ValNodeCopyStr (&head, 0, "\n");
1835     } else {
1836       ValNodeCopyStr (&head, 0, "\\n");
1837     }
1838   }
1839 
1840   if (head == NULL) return NULL;
1841 
1842   str = MergeFFValNodeStrs (head);
1843   ValNodeFreeData (head);
1844 
1845   return str;
1846 }
1847 
CountSlashableChars(CharPtr str)1848 static size_t CountSlashableChars (
1849   CharPtr str
1850 )
1851 
1852 {
1853   Char    ch;
1854   size_t  count = 0;
1855 
1856   if (str == NULL) return 0;
1857 
1858   ch = *str;
1859   while (ch != '\0') {
1860     if (ch == '\n' || ch == '\r' || ch == '\t' || ch == '~' || ch == '\\') {
1861       count++;
1862     }
1863     str++;
1864     ch = *str;
1865   }
1866 
1867   return count;
1868 }
1869 
CatenateCommentInGbseq(IntAsn2gbJobPtr ajp,GBSeqPtr gbseq,CharPtr str,Boolean compress,Boolean protectSlash)1870 static void CatenateCommentInGbseq (
1871   IntAsn2gbJobPtr ajp,
1872   GBSeqPtr gbseq,
1873   CharPtr str,
1874   Boolean compress,
1875   Boolean protectSlash
1876 )
1877 
1878 {
1879   Char     ch;
1880   CharPtr  cpy, dst, ptr, src, tmp;
1881 
1882   if (ajp == NULL || gbseq == NULL || StringHasNoText (str)) return;
1883 
1884   if (StringNCmp (str, "COMMENT     ", 12) == 0) {
1885     str += 12;
1886   }
1887 
1888   cpy = StringSave (str);
1889   if (cpy == NULL) return;
1890 
1891   ptr = cpy;
1892   ch = *ptr;
1893   while (ch != '\0') {
1894     if (ch == '\n' || ch == '\r' || ch == '\t') {
1895       *ptr = ' ';
1896     }
1897     ptr++;
1898     ch = *ptr;
1899   }
1900 
1901   if (compress) {
1902     Asn2gnbkCompressSpaces (cpy);
1903   }
1904 
1905   if (! ajp->oldXmlPolicy) {
1906     tmp = (CharPtr) MemNew (StringLen (cpy) + CountSlashableChars (cpy) + 10);
1907     if (tmp == NULL) return;
1908 
1909     dst = tmp;
1910     src = cpy;
1911     ch = *src;
1912     while (ch != '\0') {
1913        if (ch == '~') {
1914         *dst = '\\';
1915         dst++;
1916         *dst = 'n';
1917         dst++;
1918         src++;
1919         ch = *src;
1920         while (ch == ' ') {
1921           *dst = ch;
1922           dst++;
1923           src++;
1924           ch = *src;
1925         }
1926       } else if (ch == ' ') {
1927         *dst = ch;
1928         dst++;
1929         src++;
1930         ch = *src;
1931         while (ch == ' ') {
1932           src++;
1933           ch = *src;
1934         }
1935       } else if (ch == '\\' && protectSlash) {
1936         *dst = '\\';
1937         dst++;
1938         *dst = '\\';
1939         dst++;
1940         src++;
1941         ch = *src;
1942       } else {
1943         *dst = ch;
1944         dst++;
1945         src++;
1946         ch = *src;
1947       }
1948     }
1949     *dst = '\0';
1950 
1951     MemFree (cpy);
1952     cpy = tmp;
1953   }
1954 
1955   if (gbseq->comment == NULL) {
1956     gbseq->comment = cpy;
1957   } else {
1958     tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (cpy) + 10);
1959     if (tmp == NULL) return;
1960     StringCpy (tmp, gbseq->comment);
1961     if (ajp->oldXmlPolicy) {
1962       StringCat (tmp, "; ");
1963     } else {
1964       StringCat (tmp, "\\r");
1965     }
1966     StringCat (tmp, cpy);
1967     MemFree (cpy);
1968     gbseq->comment = MemFree (gbseq->comment);
1969     gbseq->comment = tmp;
1970   }
1971 }
1972 
CommentTildes(CharPtr PNTR str)1973 static void CommentTildes (
1974   CharPtr PNTR str
1975 )
1976 
1977 {
1978 #ifndef OS_MSWIN
1979   FindReplaceString (str, "nnotated by GenomeRefine~~", "nnotated by GenomeRefine", FALSE, FALSE);
1980   FindReplaceString (str, "based on SOLiD3 (Applied Biosystems)~~", "based on SOLiD3 (Applied Biosystems)", FALSE, FALSE);
1981   FindReplaceString (str, "Biological resourse center, NITE (NRBC)~~", "Biological resourse center, NITE (NRBC)", FALSE, FALSE);
1982   FindReplaceString (str, "developmental01.html~~", "developmental01.html", FALSE, FALSE);
1983   FindReplaceString (str, "http://bionano.toyo.ac.jp/~~", "http://bionano.toyo.ac.jp/", FALSE, FALSE);
1984   FindReplaceString (str, "http://dictycdb1.biol.tsukuba.ac.jp/acytodb/~~", "http://dictycdb1.biol.tsukuba.ac.jp/acytodb/", FALSE, FALSE);
1985   FindReplaceString (str, "http://egg.umh.es~~", "http://egg.umh.es", FALSE, FALSE);
1986   FindReplaceString (str, "http://www.aist.go.jp/~~", "http://www.aist.go.jp/", FALSE, FALSE);
1987   FindReplaceString (str, "http://www.bio.nite.go.jp/~~DOGAN ; Database", "http://www.bio.nite.go.jp/\n            \nDOGAN ; Database", FALSE, FALSE);
1988   FindReplaceString (str, "http://www.bio.nite.go.jp/ngac/e/~~", "http://www.bio.nite.go.jp/ngac/e/", FALSE, FALSE);
1989   FindReplaceString (str, "http://www.brs.kyushu-u.ac.jp/~fcmic/~~", "http://www.brs.kyushu-u.ac.jp/~fcmic/", FALSE, FALSE);
1990   FindReplaceString (str, "http://www.miyazaki-u.ac.jp/ir/english/index.html~~", "http://www.miyazaki-u.ac.jp/ir/english/index.html", FALSE, FALSE);
1991   FindReplaceString (str, "URL:http://www.bio.nite.go.jp/~~", "URL:http://www.bio.nite.go.jp/", FALSE, FALSE);
1992   FindReplaceString (str, "RAST version 2.0 (http://rast.nmpdr.org/)~~", "RAST version 2.0 (http://rast.nmpdr.org/)", FALSE, FALSE);
1993   FindReplaceString (str, "URL:http://www.tmd.ac.jp/grad/bac/database.html~~", "URL:http://www.tmd.ac.jp/grad/bac/database.html", FALSE, FALSE);
1994 #endif
1995 }
1996 
FormatCommentBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)1997 NLM_EXTERN CharPtr FormatCommentBlock (
1998   Asn2gbFormatPtr afp,
1999   BaseBlockPtr bbp
2000 )
2001 
2002 {
2003   Boolean            add_period;
2004   IntAsn2gbJobPtr    ajp;
2005   Asn2gbSectPtr      asp;
2006   Boolean            as_string = FALSE;
2007   Boolean            blank_before = FALSE;
2008   CommentBlockPtr    cbp;
2009   Char               ch;
2010   SeqMgrDescContext  dcontext;
2011   CharPtr            db;
2012   DbtagPtr           dbt;
2013   Boolean            do_gbseq = TRUE;
2014   SeqMgrFeatContext  fcontext;
2015   GBSeqPtr           gbseq;
2016   size_t             len;
2017   ObjectIdPtr        oip;
2018   CharPtr            prefix;
2019   SeqDescrPtr        sdp;
2020   SeqFeatPtr         sfp;
2021   Char               sfx [32];
2022   CharPtr            str;
2023   CharPtr            struc_comm_title = NULL;
2024   CharPtr            suffix;
2025   CharPtr            title;
2026   UserObjectPtr      uop = NULL;
2027   StringItemPtr      ffstring;
2028 
2029   if (afp == NULL || bbp == NULL) return NULL;
2030   ajp = afp->ajp;
2031   if (ajp == NULL) return NULL;
2032   asp = afp->asp;
2033   if (asp == NULL) return NULL;
2034 
2035   cbp = (CommentBlockPtr) bbp;
2036 
2037   /* optionally populate gbseq for XML-ized GenBank format */
2038 
2039   if (ajp->gbseq) {
2040     gbseq = &asp->gbseq;
2041   } else {
2042     gbseq = NULL;
2043   }
2044 
2045   /* some comments are allocated (along with possible first COMMENT label) */
2046 
2047   if (! StringHasNoText (bbp->string)) {
2048     str = StringSave (bbp->string);
2049     CatenateCommentInGbseq (ajp, gbseq, str, TRUE, FALSE);
2050     return str;
2051   }
2052 
2053   title = NULL;
2054   prefix = NULL;
2055   suffix = NULL;
2056   add_period = FALSE;
2057   sfx [0] = '\0';
2058 
2059   if (bbp->itemtype == OBJ_SEQDESC) {
2060 
2061     /* usually should reference comment, maploc, or region descriptor IDs */
2062 
2063     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
2064     if (sdp != NULL) {
2065 
2066       if (dcontext.seqdesctype == Seq_descr_comment) {
2067 
2068         title = (CharPtr) sdp->data.ptrvalue;
2069 
2070       } else if (dcontext.seqdesctype == Seq_descr_maploc) {
2071 
2072         dbt = (DbtagPtr) sdp->data.ptrvalue;
2073         if (dbt != NULL) {
2074           db = dbt->db;
2075           oip = dbt->tag;
2076           if (oip != NULL) {
2077             if (oip->str != NULL) {
2078 
2079               title = oip->str;
2080               prefix = ("Map location: ");
2081 
2082             } else if (db != NULL && oip->id != 0) {
2083 
2084               title = db;
2085               prefix = ("Map location: (Database ");
2086               sprintf (sfx, "; id # %ld).", (long) oip->id);
2087               suffix = sfx;
2088 
2089             }
2090           }
2091         }
2092 
2093       } else if (dcontext.seqdesctype == Seq_descr_region) {
2094 
2095         title = (CharPtr) sdp->data.ptrvalue;
2096         prefix = "Region: ";
2097 
2098       } else if (dcontext.seqdesctype == Seq_descr_name) {
2099 
2100         title = (CharPtr) sdp->data.ptrvalue;
2101         prefix = "Name: ";
2102 
2103       } else if (dcontext.seqdesctype == Seq_descr_user) {
2104 
2105         uop = (UserObjectPtr) sdp->data.ptrvalue;
2106         if (uop != NULL) {
2107           title = GetStrForStructuredComment (ajp, uop);
2108           if (title != NULL) {
2109             struc_comm_title = title;
2110             str = GetStructuredCommentTable (ajp, uop);
2111             CatenateCommentInGbseq (ajp, gbseq, str, TRUE, FALSE);
2112             MemFree (str);
2113             blank_before = TRUE;
2114             as_string = TRUE;
2115             do_gbseq = FALSE;
2116           }
2117         }
2118 
2119       }
2120     }
2121 
2122   } else if (bbp->itemtype == OBJ_SEQFEAT) {
2123 
2124     /* also have to deal with comment feature across entire sequence */
2125 
2126     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
2127     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_COMMENT) {
2128 
2129       title = sfp->comment;
2130     }
2131   }
2132 
2133   if (title == NULL) return NULL;
2134 
2135   ffstring = FFGetString(ajp);
2136   if ( ffstring == NULL ) return NULL;
2137 
2138   if (cbp->first) {
2139     FFStartPrint (ffstring, afp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
2140   } else {
2141     FFStartPrint (ffstring, afp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
2142     if (blank_before) {
2143       if (! cbp->no_blank_before) {
2144         FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
2145       }
2146     }
2147   }
2148 
2149   str = StringSave (title);
2150 
2151   if (StringDoesHaveText (str)) {
2152     CommentTildes (&str);
2153   }
2154 
2155   TrimSpacesAndJunkFromEnds (str, TRUE);
2156 
2157   /* remove trailing double tilde */
2158   /*
2159   len = StringLen (str);
2160   if (len > 5 && str [len-1] == '~' && str [len-2] == '~') {
2161     str [len-2] = '\0';
2162   }
2163   */
2164 
2165   if (as_string) {
2166     FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2167   } else {
2168     if (! IsEllipsis (str)) {
2169       s_RemovePeriodFromEnd (str);
2170       len = StringLen (str);
2171       if (len > 0) {
2172         ch = str [len - 1];
2173         if (ch != '.' && ch != '/' && ch != '~') {
2174           add_period = TRUE;
2175         }
2176       }
2177     }
2178     AddCommentWithURLlinks(ajp, ffstring, prefix, str, suffix);
2179     if (add_period) {
2180       FFAddOneChar (ffstring, '.', FALSE);
2181     }
2182   }
2183 
2184   MemFree (str);
2185 
2186   str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 5, 5, "CC");
2187 
2188   if (do_gbseq) {
2189     CatenateCommentInGbseq (ajp, gbseq, title, ajp->oldXmlPolicy, TRUE);
2190   }
2191 
2192   FFRecycleString(ajp, ffstring);
2193 
2194   MemFree (struc_comm_title);
2195 
2196   return str;
2197 }
2198 
2199 /* format features section */
2200 
is_real_id(SeqIdPtr sip,SeqIdPtr this_sip)2201 static Boolean is_real_id (
2202   SeqIdPtr sip,
2203   SeqIdPtr this_sip
2204 )
2205 
2206 {
2207   BioseqPtr  bsp;
2208 
2209   if (sip == NULL || this_sip == NULL) return FALSE;
2210 
2211   if (! SeqIdIn (sip, this_sip)) {
2212     bsp = BioseqFind (sip);
2213     if (bsp == NULL) return TRUE;  /* ??? */
2214     if (bsp->repr == Seq_repr_virtual) return FALSE;
2215   }
2216 
2217   return TRUE;
2218 }
2219 
FlatVirtLoc(BioseqPtr bsp,SeqLocPtr location)2220 static Boolean FlatVirtLoc (
2221   BioseqPtr bsp,
2222   SeqLocPtr location
2223 )
2224 
2225 {
2226   SeqIntPtr  sintp;
2227   SeqIdPtr   sip;
2228   SeqPntPtr  spp;
2229 
2230   if (bsp == NULL || location == NULL) return FALSE;
2231 
2232   switch (location->choice) {
2233     case SEQLOC_WHOLE :
2234       sip = (SeqIdPtr) location->data.ptrvalue;
2235       if (sip == NULL) return TRUE;
2236       if (! is_real_id (sip, bsp->id)) return TRUE;
2237       break;
2238     case SEQLOC_INT :
2239       sintp = (SeqIntPtr) location->data.ptrvalue;
2240       if (sintp == NULL) return TRUE;
2241       sip = sintp->id;
2242       if (sip == NULL) return TRUE;
2243       if (! is_real_id (sip, bsp->id)) return TRUE;
2244       break;
2245     case SEQLOC_PNT :
2246       spp = (SeqPntPtr) location->data.ptrvalue;
2247       if (spp == NULL) return TRUE;
2248       sip = spp->id;
2249       if (sip == NULL) return TRUE;
2250       if (! is_real_id (sip, bsp->id)) return TRUE;
2251       break;
2252     default :
2253       break;
2254   }
2255 
2256   return FALSE;
2257 }
2258 
2259 static Uint1    id_order [NUM_SEQID];
2260 static Boolean  order_initialized = FALSE;
2261 
2262 static CharPtr lim_str [5] = { "", ">","<", ">", "<" };
2263 
GetAccnVerFromServer(BIG_ID gi,CharPtr buf)2264 NLM_EXTERN Boolean GetAccnVerFromServer (BIG_ID gi, CharPtr buf)
2265 
2266 {
2267   AccnVerLookupFunc  func;
2268   SeqMgrPtr          smp;
2269   CharPtr            str;
2270 
2271   if (buf == NULL) return FALSE;
2272   *buf = '\0';
2273   smp = SeqMgrWriteLock ();
2274   if (smp == NULL) return FALSE;
2275   func = smp->accn_ver_lookup_func;
2276   SeqMgrUnlock ();
2277   if (func == NULL) return FALSE;
2278   str = (*func) (gi);
2279   if (str == NULL) return FALSE;
2280   if (StringLen (str) < 40) {
2281     StringCpy (buf, str);
2282   }
2283   MemFree (str);
2284   return TRUE;
2285 }
2286 
2287 
2288 /******************************************************************************/
2289 /*                              FFFlatLoc functions  .                          */
2290 /******************************************************************************/
2291 
FF_FlatNullAhead(BioseqPtr bsp,ValNodePtr location)2292 static Boolean FF_FlatNullAhead (
2293   BioseqPtr bsp,
2294   ValNodePtr location
2295 )
2296 
2297 {
2298   SeqLocPtr  next;
2299 
2300   if (bsp == NULL || location == NULL) return FALSE;
2301 
2302   next = location->next;
2303   if (next == NULL) return TRUE;
2304   if (next->choice == SEQLOC_NULL) return TRUE;
2305   if (FlatVirtLoc (bsp, next)) return TRUE;
2306 
2307   return FALSE;
2308 }
2309 
2310 
2311 
FlatLocSeqId(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqIdPtr sip)2312 static void FlatLocSeqId (
2313   IntAsn2gbJobPtr ajp,
2314   StringItemPtr ffstring,
2315   SeqIdPtr sip
2316 )
2317 
2318 {
2319   BioseqPtr    bsp;
2320   Char         buf [40];
2321   ObjectIdPtr  oip;
2322   SeqIdPtr     use_id = NULL;
2323   Boolean      was_lock = FALSE;
2324 
2325   if (ffstring == NULL || sip == NULL) return;
2326 
2327   buf [0] = '\0';
2328   bsp = BioseqFind (sip);
2329   if (bsp != NULL) {
2330     use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
2331   } else if (sip->choice == SEQID_GI) {
2332     if (GetAccnVerFromServer (sip->data.intvalue, buf)) {
2333       FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
2334       /*AddValNodeString (head, NULL, buf, ":");*/
2335       return;
2336     }
2337     use_id = GetSeqIdForGI (sip->data.intvalue);
2338   }
2339   if (use_id == NULL && bsp == NULL) {
2340     bsp = BioseqLockById (sip);
2341     was_lock = TRUE;
2342     if (bsp != NULL) {
2343       use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
2344     }
2345   }
2346   if (use_id != NULL) {
2347     SeqIdWrite (use_id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
2348     if (use_id->choice == SEQID_GI) {
2349       ajp->relModeError = TRUE;
2350     }
2351   } else if (sip->choice == SEQID_GI) {
2352     SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
2353     ajp->relModeError = TRUE;
2354   } else {
2355     SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
2356     if (sip->choice == SEQID_GI) {
2357       ajp->relModeError = TRUE;
2358     }
2359   }
2360   if (was_lock) {
2361     BioseqUnlock (bsp);
2362   }
2363   if (StringHasNoText (buf)) {
2364     StringCpy (buf, "?00000");
2365     ajp->relModeError = TRUE;
2366     if (use_id != NULL && use_id->choice == SEQID_LOCAL) {
2367       oip = (ObjectIdPtr) use_id->data.ptrvalue;
2368       if (oip != NULL && (! StringHasNoText (oip->str))) {
2369         StringNCpy_0 (buf, oip->str, 13);
2370       }
2371     }
2372   }
2373   FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
2374 }
2375 
2376 
2377 
FlatLocCaret(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqIdPtr sip,SeqIdPtr this_sip,Int4 point,IntFuzzPtr fuzz)2378 static void FlatLocCaret (
2379   IntAsn2gbJobPtr ajp,
2380   StringItemPtr ffstring,
2381   SeqIdPtr sip,
2382   SeqIdPtr this_sip,
2383   Int4 point,
2384   IntFuzzPtr fuzz
2385 )
2386 
2387 {
2388   Char   buf [128];
2389   Uint1  index;
2390 
2391   if (ffstring == NULL) return;
2392 
2393   if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
2394     FlatLocSeqId (ajp, ffstring, sip);
2395   }
2396 
2397   buf [0] = '\0';
2398   point++; /* orginal FlatLocHalfCaret was called with point + 1 */
2399 
2400   if (fuzz != NULL) {
2401     switch (fuzz->choice) {
2402       case 1 :
2403         sprintf (buf, "(%ld.%ld)..(%ld.%ld)",
2404                  (long) (point - fuzz->a),
2405                  (long) point,
2406                  (long) point,
2407                  (long) (point + fuzz->a));
2408         break;
2409       case 2 :
2410         sprintf (buf, "%ld^%ld",
2411                  (long) (1 + fuzz->b),
2412                  (long) (1 + fuzz->a));
2413         break;
2414       case 3 :
2415         sprintf (buf, "%ld^%ld",
2416                  (long) (point - point * ((double) fuzz->a / 1000.0)),
2417                  (long) (point + point * ((double) fuzz->a / 1000.0)));
2418         break;
2419       case 4 :
2420         if (fuzz->a == 3) { /* space to right */
2421           sprintf (buf, "%ld^%ld", (long) (point), (long) (point + 1));
2422         } else if (fuzz->a == 4 && point > 1) { /* space to left */
2423           sprintf (buf, "%ld^%ld", (long) (point - 1), (long) point);
2424         } else {
2425           index = (Uint1) fuzz->a;
2426           if (index > 4) {
2427             index = 0;
2428           }
2429           sprintf (buf, "%s%ld", lim_str [index], (long) point);
2430         }
2431         break;
2432       default :
2433         sprintf (buf, "%ld", (long) point);
2434         break;
2435     }
2436   } else {
2437     sprintf (buf, "%ld", (long) point);
2438   }
2439 
2440   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2441 }
2442 
2443 
FlatLocPoint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqIdPtr sip,SeqIdPtr this_sip,Int4 point,IntFuzzPtr fuzz)2444 static void FlatLocPoint (
2445   IntAsn2gbJobPtr ajp,
2446   StringItemPtr ffstring,
2447   SeqIdPtr sip,
2448   SeqIdPtr this_sip,
2449   Int4 point,
2450   IntFuzzPtr fuzz
2451 )
2452 
2453 {
2454   Char   buf [128];
2455   Uint1  index;
2456 
2457   if (ffstring == NULL) return;
2458 
2459   if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
2460     FlatLocSeqId (ajp, ffstring, sip);
2461   }
2462 
2463   buf [0] = '\0';
2464   point++;
2465 
2466   if (fuzz != NULL) {
2467     switch (fuzz->choice) {
2468       case 1 :
2469         sprintf (buf, "(%ld.%ld)",
2470                  (long) (point - fuzz->a),
2471                  (long) (point + fuzz->a));
2472         break;
2473       case 2 :
2474         sprintf (buf, "(%ld.%ld)",
2475                  (long) (1 + fuzz->b),
2476                  (long) (1 + fuzz->a));
2477         break;
2478       case 3 :
2479         sprintf (buf, "(%ld.%ld)",
2480                  (long) (point - point * ((double) fuzz->a / 1000.0)),
2481                  (long) (point + point * ((double) fuzz->a / 1000.0)));
2482         break;
2483       case 4 :
2484         index = (Uint1) fuzz->a;
2485         if (index > 4) {
2486           index = 0;
2487         }
2488         sprintf (buf, "%s%ld", lim_str [index], (long) point);
2489         break;
2490       default :
2491         sprintf (buf, "%ld", (long) point);
2492         break;
2493     }
2494   } else {
2495     sprintf (buf, "%ld", (long) point);
2496   }
2497 
2498   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2499 }
2500 
2501 
FlatLocElement(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,Boolean isGap)2502 static void FlatLocElement (
2503   IntAsn2gbJobPtr ajp,
2504   StringItemPtr ffstring,
2505   BioseqPtr bsp,
2506   SeqLocPtr location,
2507   Boolean isGap
2508 
2509 )
2510 
2511 {
2512   Boolean     minus_strand = FALSE;
2513   SeqBondPtr  sbp;
2514   SeqIntPtr   sintp;
2515   SeqIdPtr    sip;
2516   SeqPntPtr   spp;
2517   BioseqPtr   wholebsp;
2518 
2519   if (ffstring == NULL || bsp == NULL || location == NULL) return;
2520 
2521   switch (location->choice) {
2522     case SEQLOC_WHOLE :
2523       sip = (SeqIdPtr) location->data.ptrvalue;
2524       if (sip == NULL) return;
2525       wholebsp = BioseqFind (sip);
2526       if (wholebsp == NULL) return;
2527       if (is_real_id (sip, bsp->id)) {
2528         FlatLocPoint (ajp, ffstring, sip, bsp->id, 0, NULL);
2529         if (bsp->length > 0) {
2530           FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
2531           FlatLocPoint (ajp, ffstring, NULL, bsp->id, bsp->length - 1, NULL);
2532         }
2533       }
2534       break;
2535     case SEQLOC_INT :
2536       sintp = (SeqIntPtr) location->data.ptrvalue;
2537       if (sintp == NULL) return;
2538       sip = sintp->id;
2539       if (sip == NULL) return;
2540       if (is_real_id (sip, bsp->id)) {
2541         minus_strand = (Boolean) (sintp->strand == Seq_strand_minus);
2542         if (minus_strand) {
2543           FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2544         }
2545         FlatLocPoint (ajp, ffstring, sip, bsp->id, sintp->from, sintp->if_from);
2546         if (sintp->to > 0 &&
2547             (sintp->to != sintp->from ||
2548              sintp->if_from != NULL ||
2549              sintp->if_to != NULL) ||
2550              isGap) {
2551           FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
2552           FlatLocPoint (ajp, ffstring, NULL, bsp->id, sintp->to, sintp->if_to);
2553         }
2554         if (minus_strand) {
2555           FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2556         }
2557       }
2558       break;
2559     case SEQLOC_PNT :
2560       spp = (SeqPntPtr) location->data.ptrvalue;
2561       if (spp == NULL) return;
2562       sip = spp->id;
2563       if (sip == NULL) return;
2564       if (is_real_id (sip, bsp->id)) {
2565         minus_strand = (Boolean) (spp->strand == Seq_strand_minus);
2566         if (minus_strand) {
2567           FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2568         }
2569         if (spp->fuzz != NULL) {
2570           FlatLocCaret (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
2571         } else {
2572           FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, NULL);
2573         }
2574         if (minus_strand) {
2575           FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2576         }
2577       }
2578       break;
2579     case SEQLOC_BOND :
2580       sbp = (SeqBondPtr) location->data.ptrvalue;
2581       if (sbp == NULL) return;
2582       spp = sbp->a;
2583       if (spp == NULL) return;
2584       sip = spp->id;
2585       if (sip == NULL) return;
2586       FFAddOneString(ffstring, "bond(", FALSE, FALSE, TILDE_IGNORE);
2587       FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
2588       spp = sbp->b;
2589       if (spp != NULL) {
2590         FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2591         FlatLocPoint (ajp, ffstring, NULL, bsp->id, spp->point, spp->fuzz);
2592       }
2593       FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2594       break;
2595     default :
2596       /* unexpected internal complex type or unimplemented SEQLOC_FEAT */
2597       return;
2598   }
2599 }
2600 
2601 
2602 
FF_FlatPackedPoint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,PackSeqPntPtr pspp,BioseqPtr bsp,Boolean isGap)2603 static void FF_FlatPackedPoint (
2604   IntAsn2gbJobPtr ajp,
2605   StringItemPtr ffstring,
2606   PackSeqPntPtr pspp,
2607   BioseqPtr bsp,
2608   Boolean isGap
2609 )
2610 
2611 {
2612   Uint1  dex;
2613 
2614   if (ffstring == NULL || pspp == NULL || bsp == NULL) return;
2615 
2616   for (dex = 0; dex < pspp->used; dex++) {
2617     FlatLocPoint (ajp, ffstring, pspp->id, bsp->id, pspp->pnts [dex], pspp->fuzz);
2618   }
2619 }
2620 
2621 
2622 static void FF_DoFlatLoc (
2623   IntAsn2gbJobPtr ajp,
2624   StringItemPtr ffstring,
2625   BioseqPtr bsp,
2626   SeqLocPtr location,
2627   Boolean ok_to_complement,
2628   Boolean isGap
2629 );
2630 
FF_GroupFlatLoc(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,CharPtr prefix,Boolean is_flat_order,Boolean isGap)2631 static void FF_GroupFlatLoc (
2632   IntAsn2gbJobPtr ajp,
2633   StringItemPtr ffstring,
2634   BioseqPtr bsp,
2635   SeqLocPtr location,
2636   CharPtr prefix,
2637   Boolean is_flat_order,
2638   Boolean isGap
2639 )
2640 
2641 {
2642   Boolean        found_non_virt = FALSE;
2643   SeqIdPtr       hold_next;
2644   Int2           parens = 1;
2645   PackSeqPntPtr  pspp;
2646   SeqLocPtr      slp;
2647   Boolean        special_mode = FALSE; /* join in order */
2648 
2649   if (ffstring == NULL || bsp == NULL || location == NULL) return;
2650 
2651   /* prefix will have the first parenthesis */
2652 
2653   FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2654 
2655   for (slp = (SeqLocPtr) location->data.ptrvalue; slp != NULL; slp = slp->next) {
2656 
2657     if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) {
2658       if (slp != location && slp->next != NULL) {
2659         if (special_mode) {
2660           special_mode = FALSE;
2661           FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2662           parens--;
2663         }
2664       }
2665       continue;
2666     }
2667 
2668     if (found_non_virt && slp->choice != SEQLOC_EMPTY && slp->choice != SEQLOC_NULL) {
2669       FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2670     }
2671 
2672     switch (slp->choice) {
2673       case SEQLOC_WHOLE :
2674       case SEQLOC_PNT :
2675       case SEQLOC_BOND :
2676       case SEQLOC_FEAT :
2677         found_non_virt = TRUE;
2678         if (FlatVirtLoc (bsp, slp)) {
2679           if (slp != location && slp->next != NULL) {
2680             if (special_mode) {
2681               special_mode = FALSE;
2682               FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2683               parens--;
2684             }
2685           }
2686         } else {
2687           FlatLocElement (ajp, ffstring, bsp, slp, isGap);
2688         }
2689         break;
2690       case SEQLOC_INT :
2691         found_non_virt = TRUE;
2692         if (is_flat_order && (! FF_FlatNullAhead (bsp, slp))) {
2693           special_mode = TRUE;
2694           FFAddOneString(ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
2695           parens++;
2696         }
2697         FlatLocElement (ajp, ffstring, bsp, slp, isGap);
2698         break;
2699       case SEQLOC_PACKED_PNT :
2700         found_non_virt = TRUE;
2701         pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2702         if (pspp != NULL) {
2703           FF_FlatPackedPoint (ajp, ffstring, pspp, bsp, isGap);
2704         }
2705         break;
2706       case SEQLOC_PACKED_INT :
2707       case SEQLOC_MIX :
2708       case SEQLOC_EQUIV :
2709         found_non_virt = TRUE;
2710         hold_next = slp->next;
2711         slp->next = NULL;
2712         FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE, isGap);
2713         slp->next = hold_next;
2714         break;
2715       default :
2716         break;
2717     }
2718 
2719   }
2720 
2721   while (parens > 0) {
2722     FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2723     parens--;
2724   }
2725 }
2726 
2727 
2728 
2729 
FF_DoFlatLoc(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,Boolean ok_to_complement,Boolean isGap)2730 static void FF_DoFlatLoc (
2731   IntAsn2gbJobPtr ajp,
2732   StringItemPtr ffstring,
2733   BioseqPtr bsp,
2734   SeqLocPtr location,
2735   Boolean ok_to_complement,
2736   Boolean isGap
2737 
2738 )
2739 
2740 {
2741   Boolean        found_null;
2742   SeqLocPtr      next_loc;
2743   PackSeqPntPtr  pspp;
2744   SeqLocPtr      slp;
2745 
2746   if (ffstring == NULL || bsp == NULL || location == NULL) return;
2747 
2748   /* deal with complement of entire location */
2749 
2750   if (ok_to_complement && SeqLocStrand (location) == Seq_strand_minus) {
2751     slp = AsnIoMemCopy ((Pointer) location,
2752                         (AsnReadFunc) SeqLocAsnRead,
2753                         (AsnWriteFunc) SeqLocAsnWrite);
2754     if (slp != NULL) {
2755       SeqLocRevCmp (slp);
2756       FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2757       FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE, isGap);
2758       FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2759     }
2760     SeqLocFree (slp);
2761     return;
2762   }
2763 
2764   /* handle each location component */
2765 
2766   for (slp = location; slp != NULL; slp = slp->next) {
2767 
2768     if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) continue;
2769 
2770     /* print comma between components */
2771 
2772     if (slp != location) {
2773       FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2774     }
2775 
2776     switch (slp->choice) {
2777       case SEQLOC_MIX :
2778       case SEQLOC_PACKED_INT :
2779         found_null = FALSE;
2780         for (next_loc = (SeqLocPtr) slp->data.ptrvalue;
2781          next_loc != NULL;
2782          next_loc = next_loc->next) {
2783           if (next_loc->choice == SEQLOC_NULL ||
2784               FlatVirtLoc (bsp, next_loc) /* ||
2785               LocationHasNullsBetween (slp) */ )
2786             found_null = TRUE;
2787         }
2788         if (found_null) {
2789           FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "order(", TRUE, isGap);
2790         } else {
2791           FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "join(", FALSE, isGap);
2792         }
2793         break;
2794       case SEQLOC_EQUIV :
2795         FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "one-of(", FALSE, isGap);
2796         break;
2797       case SEQLOC_PACKED_PNT :
2798         pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2799         if (pspp != NULL) {
2800           FF_FlatPackedPoint (ajp, ffstring, pspp, bsp, isGap);
2801         }
2802         break;
2803       default :
2804         FlatLocElement (ajp, ffstring, bsp, slp, isGap);
2805         break;
2806     }
2807 
2808   }
2809 }
2810 
FF_DoFlatLocEx(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,Boolean ok_to_complement,Boolean isGap,Boolean swapPartials)2811 static void FF_DoFlatLocEx (
2812   IntAsn2gbJobPtr ajp,
2813   StringItemPtr ffstring,
2814   BioseqPtr bsp,
2815   SeqLocPtr location,
2816   Boolean ok_to_complement,
2817   Boolean isGap,
2818   Boolean swapPartials
2819 )
2820 
2821 {
2822   Boolean    partial5, partial3;
2823   SeqLocPtr  slp;
2824 
2825   if (location == NULL) return;
2826 
2827   if (! swapPartials) {
2828     FF_DoFlatLoc (ajp, ffstring, bsp, location, ok_to_complement, isGap);
2829     return;
2830   }
2831 
2832   slp = (SeqLocPtr) AsnIoMemCopy ((Pointer) location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
2833   if (slp == NULL) return;
2834   CheckSeqLocForPartial (slp, &partial5, &partial3);
2835   SetSeqLocPartial (slp, partial3, partial5);
2836   FF_DoFlatLoc (ajp, ffstring, bsp, slp, ok_to_complement, isGap);
2837   SeqLocFree (slp);
2838 }
2839 
2840 
FFFlatLoc(IntAsn2gbJobPtr ajp,BioseqPtr bsp,SeqLocPtr location,Boolean masterStyle,Boolean isGap)2841 NLM_EXTERN CharPtr FFFlatLoc (
2842   IntAsn2gbJobPtr ajp,
2843   BioseqPtr bsp,
2844   SeqLocPtr location,
2845   Boolean masterStyle,
2846   Boolean isGap
2847 )
2848 
2849 {
2850   Boolean     hasNulls;
2851   IntFuzzPtr  fuzz = NULL;
2852   SeqLocPtr   loc;
2853   Boolean     minus_strand = FALSE;
2854   Boolean     noLeft;
2855   Boolean     noRight;
2856   Uint1       num = 1;
2857   ValNodePtr  partiallist = NULL, emptypartials = NULL;
2858   SeqPntPtr   spp;
2859   CharPtr     str;
2860   SeqLocPtr   tmp;
2861   StringItemPtr ffstring = NULL;
2862 
2863   if (ajp == NULL || bsp == NULL || location == NULL) return NULL;
2864 
2865   ffstring = FFGetString(ajp);
2866 
2867   if (! order_initialized) {
2868     id_order [SEQID_GENBANK] = num++;
2869     id_order [SEQID_EMBL] = num++;
2870     id_order [SEQID_DDBJ] = num++;
2871     id_order [SEQID_OTHER] = num++;
2872     id_order [SEQID_TPG] = num++;
2873     id_order [SEQID_TPE] = num++;
2874     id_order [SEQID_TPD] = num++;
2875     id_order [SEQID_GPIPE] = num++;
2876     id_order [SEQID_GIBBSQ] = num++;
2877     id_order [SEQID_GIBBMT] = num++;
2878     id_order [SEQID_PRF] = num++;
2879     id_order [SEQID_PDB] = num++;
2880     id_order [SEQID_PIR] = num++;
2881     id_order [SEQID_SWISSPROT] = num++;
2882     id_order [SEQID_PATENT] = num++;
2883     id_order [SEQID_GI] = num++;;
2884     id_order [SEQID_GENERAL] = num++;
2885     id_order [SEQID_LOCAL] = num++;
2886     id_order [SEQID_GIIM] = num++;
2887     order_initialized = TRUE;
2888   }
2889 
2890   if (ajp->ajp.slp != NULL) {
2891     minus_strand = (Boolean) (SeqLocStrand (ajp->ajp.slp) == Seq_strand_minus);
2892   }
2893 
2894   if (ajp->smallGenomeSet) {
2895     FF_DoFlatLocEx (ajp, ffstring, bsp, location, TRUE, isGap, minus_strand);
2896   } else if (masterStyle) {
2897 
2898     /* map location from parts to segmented bioseq */
2899 
2900     if (location->choice == SEQLOC_PNT) {
2901       spp = (SeqPntPtr) location->data.ptrvalue;
2902       if (spp != NULL) {
2903         fuzz = spp->fuzz;
2904       }
2905     }
2906 
2907     partiallist = GetSeqLocPartialSet (location);
2908     CheckSeqLocForPartial (location, &noLeft, &noRight);
2909     hasNulls = LocationHasNullsBetween (location);
2910     loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, ajp->relaxedMapping);
2911     if (loc == NULL) {
2912       tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
2913       loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, ajp->relaxedMapping);
2914       SeqLocFree (tmp);
2915     }
2916     if (loc == NULL) {
2917       ValNodeFree (partiallist);
2918       return StringSave ("?");
2919     }
2920     emptypartials = GetSeqLocPartialSet (loc);
2921     FreeAllFuzz (loc);
2922     SetSeqLocPartial (loc, noLeft, noRight);
2923     if (ValNodeLen (partiallist) == ValNodeLen (emptypartials)) {
2924       SetSeqLocPartialSet (loc, partiallist);
2925     }
2926     ValNodeFree (partiallist);
2927     ValNodeFree (emptypartials);
2928 
2929     if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
2930       spp = (SeqPntPtr) loc->data.ptrvalue;
2931       if (spp != NULL && spp->fuzz == NULL) {
2932         spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
2933                                   (AsnReadFunc) IntFuzzAsnRead,
2934                                   (AsnWriteFunc) IntFuzzAsnWrite);
2935       }
2936     }
2937 
2938     FF_DoFlatLocEx (ajp, ffstring, bsp, loc, TRUE, isGap, minus_strand);
2939 
2940     SeqLocFree (loc);
2941 
2942   } else {
2943     FF_DoFlatLocEx (ajp, ffstring, bsp, location, TRUE, isGap, minus_strand);
2944   }
2945 
2946   str = FFToCharPtr(ffstring);
2947   FFRecycleString(ajp, ffstring);
2948   return str;
2949 }
2950 
2951 
2952 
2953 
PromoteSeqId(SeqIdPtr sip,Pointer userdata)2954 static void PromoteSeqId (SeqIdPtr sip, Pointer userdata)
2955 
2956 {
2957   SeqIdPtr  bestid, newid, oldid;
2958 
2959   bestid = (SeqIdPtr) userdata;
2960 
2961   newid = SeqIdDup (bestid);
2962   if (newid == NULL) return;
2963 
2964   oldid = ValNodeNew (NULL);
2965   if (oldid == NULL) return;
2966 
2967   MemCopy (oldid, sip, sizeof (ValNode));
2968   oldid->next = NULL;
2969 
2970   sip->choice = newid->choice;
2971   sip->data.ptrvalue = newid->data.ptrvalue;
2972 
2973   SeqIdFree (oldid);
2974   ValNodeFree (newid);
2975 
2976   SeqIdStripLocus (sip);
2977 }
2978 
SeqLocReMapEx(SeqIdPtr newid,SeqLocPtr seq_loc,SeqLocPtr location,Int4 offset,Boolean rev,Boolean masterStyle,Boolean relaxed)2979 NLM_EXTERN SeqLocPtr SeqLocReMapEx (
2980   SeqIdPtr newid,
2981   SeqLocPtr seq_loc,
2982   SeqLocPtr location,
2983   Int4 offset,
2984   Boolean rev,
2985   Boolean masterStyle,
2986   Boolean relaxed
2987 )
2988 
2989 {
2990   BioseqPtr    bsp;
2991   Boolean      hasNulls;
2992   IntFuzzPtr   fuzz = NULL;
2993   SeqLocPtr    loc;
2994   Boolean      noLeft;
2995   Boolean      noRight;
2996   SeqEntryPtr  scope;
2997   SeqIdPtr     sip;
2998   SeqLocPtr    slp = NULL;
2999   SeqPntPtr    spp;
3000   SeqLocPtr    tmp;
3001 
3002   if (newid == NULL || seq_loc == NULL || location == NULL) return NULL;
3003 
3004   if (masterStyle) {
3005 
3006     sip = SeqLocId (seq_loc);
3007     if (sip == NULL) return NULL;
3008     bsp = BioseqFind (sip);
3009     if (bsp == NULL) {
3010       scope = SeqEntrySetScope (NULL);
3011       bsp = BioseqFind (sip);
3012       SeqEntrySetScope (scope);
3013     }
3014     if (bsp == NULL) return NULL;
3015     sip = SeqIdFindBest (bsp->id, 0);
3016 
3017     /* map location from parts to segmented bioseq */
3018 
3019     if (location->choice == SEQLOC_PNT) {
3020       spp = (SeqPntPtr) location->data.ptrvalue;
3021       if (spp != NULL) {
3022         fuzz = spp->fuzz;
3023       }
3024     }
3025 
3026     CheckSeqLocForPartial (location, &noLeft, &noRight);
3027     hasNulls = LocationHasNullsBetween (location);
3028     loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE, relaxed);
3029     if (loc == NULL) {
3030       tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
3031       loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE, relaxed);
3032       SeqLocFree (tmp);
3033     }
3034     if (loc == NULL) {
3035       return NULL;
3036     }
3037     FreeAllFuzz (loc);
3038     SetSeqLocPartial (loc, noLeft, noRight);
3039 
3040     if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
3041       spp = (SeqPntPtr) loc->data.ptrvalue;
3042       if (spp != NULL && spp->fuzz == NULL) {
3043         spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
3044                                   (AsnReadFunc) IntFuzzAsnRead,
3045                                   (AsnWriteFunc) IntFuzzAsnWrite);
3046       }
3047     }
3048 
3049     scope = SeqEntrySetScope (NULL);
3050     slp = SeqLocReMap (newid, seq_loc, loc, offset, rev);
3051     SeqEntrySetScope (scope);
3052 
3053     SeqLocFree (loc);
3054 
3055     VisitSeqIdsInSeqLoc (slp, (Pointer) sip, PromoteSeqId);
3056   } else {
3057 
3058     scope = SeqEntrySetScope (NULL);
3059     slp = SeqLocReMap (newid, seq_loc, location, offset, rev);
3060     SeqEntrySetScope (scope);
3061   }
3062 
3063   return slp;
3064 }
3065 
3066 
3067 /******************************************************************************/
3068 /*                            End FFFlatLoc functions.                          */
3069 /******************************************************************************/
3070 
3071 
3072 
SubSourceToQualArray(SubSourcePtr ssp,QualValPtr qvp)3073 static void SubSourceToQualArray (
3074   SubSourcePtr ssp,
3075   QualValPtr qvp
3076 )
3077 
3078 {
3079   SourceType  idx;
3080   Uint1       subtype;
3081 
3082   if (ssp == NULL || qvp == NULL) return;
3083 
3084   while (ssp != NULL) {
3085     subtype = ssp->subtype;
3086     if (subtype == 255) {
3087       subtype = 44;
3088     }
3089     if (subtype < 45) {
3090       idx = subSourceToSourceIdx [subtype];
3091       if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
3092         if (qvp [idx].ssp == NULL) {
3093           qvp [idx].ssp = ssp;
3094         }
3095       }
3096     }
3097     ssp = ssp->next;
3098   }
3099 }
3100 
3101 NLM_EXTERN SourceType orgModToSourceIdx [42] = {
3102   SCQUAL_zero_orgmod,
3103   SCQUAL_one_orgmod,
3104   SCQUAL_strain,
3105   SCQUAL_sub_strain,
3106   SCQUAL_type,
3107   SCQUAL_sub_type,
3108   SCQUAL_variety,
3109   SCQUAL_serotype,
3110   SCQUAL_serogroup,
3111   SCQUAL_serovar,
3112   SCQUAL_cultivar,
3113   SCQUAL_pathovar,
3114   SCQUAL_chemovar,
3115   SCQUAL_biovar,
3116   SCQUAL_biotype,
3117   SCQUAL_group,
3118   SCQUAL_sub_group,
3119   SCQUAL_isolate,
3120   SCQUAL_common,
3121   SCQUAL_acronym,
3122   SCQUAL_dosage,
3123   SCQUAL_spec_or_nat_host,
3124   SCQUAL_sub_species,
3125   SCQUAL_specimen_voucher,
3126   SCQUAL_authority,
3127   SCQUAL_forma,
3128   SCQUAL_forma_specialis,
3129   SCQUAL_ecotype,
3130   SCQUAL_synonym,
3131   SCQUAL_anamorph,
3132   SCQUAL_teleomorph,
3133   SCQUAL_breed,
3134   SCQUAL_gb_acronym,
3135   SCQUAL_gb_anamorph,
3136   SCQUAL_gb_synonym,
3137   SCQUAL_culture_collection,
3138   SCQUAL_bio_material,
3139   SCQUAL_metagenome_source,
3140   SCQUAL_type_material,
3141   SCQUAL_old_lineage,
3142   SCQUAL_old_name,
3143   SCQUAL_orgmod_note
3144 };
3145 
OrgModToQualArray(OrgModPtr omp,QualValPtr qvp)3146 static void OrgModToQualArray (
3147   OrgModPtr omp,
3148   QualValPtr qvp
3149 )
3150 
3151 {
3152   SourceType  idx;
3153   Uint1       subtype;
3154 
3155   if (omp == NULL || qvp == NULL) return;
3156 
3157   while (omp != NULL) {
3158     subtype = omp->subtype;
3159     if (subtype == 253) {
3160       subtype = 39;
3161     } else if (subtype == 254) {
3162       subtype = 40;
3163     } else if (subtype == 255) {
3164       subtype = 41;
3165     }
3166     if (subtype < 42) {
3167       idx = orgModToSourceIdx [subtype];
3168       if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
3169         if (qvp [idx].omp == NULL) {
3170           qvp [idx].omp = omp;
3171         }
3172       }
3173     }
3174     omp = omp->next;
3175   }
3176 }
3177 
3178 static CharPtr organelleQual [] = {
3179   NULL,
3180   NULL,
3181   "/organelle=\"plastid:chloroplast\"",
3182   "/organelle=\"plastid:chromoplast\"",
3183   "/organelle=\"mitochondrion:kinetoplast\"",
3184   "/organelle=\"mitochondrion\"",
3185   "/organelle=\"plastid\"",
3186   "/macronuclear",
3187   NULL,
3188   "/plasmid=\"\"",
3189   "/transposon=\"\"",
3190   "/insertion_seq=\"\"",
3191   "/organelle=\"plastid:cyanelle\"",
3192   "/proviral",
3193   NULL,
3194   "/organelle=\"nucleomorph\"",
3195   "/organelle=\"plastid:apicoplast\"",
3196   "/organelle=\"plastid:leucoplast\"",
3197   "/organelle=\"plastid:proplastid\"",
3198   NULL,
3199   "/organelle=\"hydrogenosome\"",
3200   NULL,
3201   "/organelle=\"chromatophore\""
3202 };
3203 
StringIsJustQuotes(CharPtr str)3204 NLM_EXTERN Boolean StringIsJustQuotes (
3205   CharPtr str
3206 )
3207 
3208 {
3209   Nlm_Uchar  ch;    /* to use 8bit characters in multibyte languages */
3210 
3211   if (str != NULL) {
3212     ch = *str;
3213     while (ch != '\0') {
3214       if (ch > ' ' && ch != '"' && ch != '\'') {
3215         return FALSE;
3216       }
3217       str++;
3218       ch = *str;
3219     }
3220   }
3221   return TRUE;
3222 }
3223 
RemoveAllSpaces(CharPtr str)3224 static CharPtr RemoveAllSpaces (
3225   CharPtr str
3226 )
3227 
3228 {
3229   Char     ch;
3230   CharPtr  dst;
3231   CharPtr  ptr;
3232 
3233   if (str == NULL || str [0] == '\0') return NULL;
3234 
3235   dst = str;
3236   ptr = str;
3237   ch = *ptr;
3238   while (ch != '\0') {
3239     if (ch != ' ') {
3240       *dst = ch;
3241       dst++;
3242     }
3243     ptr++;
3244     ch = *ptr;
3245   }
3246   *dst = '\0';
3247 
3248   return str;
3249 }
3250 
AddFeatureToGbseq(GBSeqPtr gbseq,GBFeaturePtr gbfeat,CharPtr str,SeqFeatPtr sfp)3251 NLM_EXTERN void AddFeatureToGbseq (
3252   GBSeqPtr gbseq,
3253   GBFeaturePtr gbfeat,
3254   CharPtr str,
3255   SeqFeatPtr sfp
3256 )
3257 
3258 {
3259   Char            ch;
3260   CharPtr         copy;
3261   GBQualifierPtr  gbqual;
3262   GBQualifierPtr  last = NULL;
3263   CharPtr         ptr;
3264   CharPtr         qual;
3265   CharPtr         tmp;
3266   CharPtr         val;
3267 
3268   if (gbseq == NULL || gbfeat == NULL || StringHasNoText (str)) return;
3269 
3270   copy = StringSave (str);
3271 
3272   /* link in reverse order, to be reversed in slash block */
3273 
3274   gbfeat->next = gbseq->feature_table;
3275   gbseq->feature_table = gbfeat;
3276 
3277   /* now parse qualifiers */
3278 
3279   ptr = StringStr (copy, "                     /");
3280   while (ptr != NULL) {
3281     qual = ptr + 22;
3282     val = qual;
3283     ch = *val;
3284     while (ch != '=' && ch != '\n' && ch != '\0') {
3285       val++;
3286       ch = *val;
3287     }
3288     /*
3289     val = StringChr (qual, '=');
3290     if (val == NULL) {
3291       val = StringChr (qual, '\n');
3292     }
3293     */
3294     if (ch != '\0' /* val != NULL */) {
3295       *val = '\0';
3296       val++;
3297       if (ch == '=') {
3298         tmp = val;
3299         if (*val == '"') {
3300           val++;
3301           tmp = val;
3302           ch = *tmp;
3303           while (ch != '"' && ch != '\0') {
3304             tmp++;
3305             ch = *tmp;
3306           }
3307         }
3308         ptr = StringStr (tmp, "\n                     /");
3309         if (ptr != NULL) {
3310           *ptr = '\0';
3311           ptr++;
3312         }
3313       } else {
3314         ptr = StringStr (val, "                     /");
3315         val = NULL;
3316       }
3317       gbqual = GBQualifierNew ();
3318       if (gbqual != NULL) {
3319         gbqual->name = StringSave (qual);
3320         if (! StringHasNoText (val)) {
3321           gbqual->value = StringSave (val);
3322           CleanQualValue (gbqual->value);
3323           Asn2gnbkCompressSpaces (gbqual->value);
3324           if (sfp != NULL) {
3325             if (sfp->data.choice == SEQFEAT_CDREGION &&
3326                 StringICmp (qual, "translation") == 0) {
3327               RemoveAllSpaces (gbqual->value);
3328             } else if (sfp->data.choice == SEQFEAT_CDREGION &&
3329                        StringICmp (qual, "coded_by") == 0) {
3330               RemoveAllSpaces (gbqual->value);
3331             } else if (sfp->data.choice == SEQFEAT_RNA &&
3332                        StringICmp (qual, "transcription") == 0) {
3333               RemoveAllSpaces (gbqual->value);
3334             } else if (sfp->data.choice == SEQFEAT_PROT &&
3335                        StringICmp (qual, "peptide") == 0) {
3336               RemoveAllSpaces (gbqual->value);
3337             } else if (sfp->data.choice == SEQFEAT_PROT &&
3338                        StringICmp (qual, "derived_from") == 0) {
3339               RemoveAllSpaces (gbqual->value);
3340             }
3341           }
3342         }
3343       }
3344     } else {
3345       gbqual = GBQualifierNew ();
3346       if (gbqual != NULL) {
3347         gbqual->name = StringSave (qual);
3348       }
3349     }
3350     if (gbfeat->quals == NULL) {
3351       gbfeat->quals = gbqual;
3352     } else if (last != NULL) {
3353       last->next = gbqual;
3354     }
3355     last = gbqual;
3356   }
3357 
3358   MemFree (copy);
3359 }
3360 
GetMolTypeQual(BioseqPtr bsp)3361 NLM_EXTERN CharPtr GetMolTypeQual (
3362   BioseqPtr bsp
3363 )
3364 
3365 {
3366   SeqMgrDescContext  dcontext;
3367   MolInfoPtr         mip;
3368   SeqDescrPtr        sdp;
3369 
3370   if (bsp == NULL) return NULL;
3371 
3372   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3373   if (sdp == NULL) return NULL;
3374   mip = (MolInfoPtr) sdp->data.ptrvalue;
3375   if (mip == NULL) return NULL;
3376 
3377   switch (mip->biomol) {
3378     case 0 :
3379       switch (bsp->mol) {
3380         case Seq_mol_dna :
3381           return "unassigned DNA";
3382         case Seq_mol_rna :
3383           return "unassigned RNA";
3384         case Seq_mol_na :
3385           break;
3386         default :
3387           break;
3388       }
3389       break;
3390     case MOLECULE_TYPE_GENOMIC :
3391       switch (bsp->mol) {
3392         case Seq_mol_dna :
3393           return "genomic DNA";
3394         case Seq_mol_rna :
3395           return "genomic RNA";
3396         case Seq_mol_na :
3397           break;
3398         default :
3399           break;
3400       }
3401       break;
3402     case MOLECULE_TYPE_PRE_MRNA :
3403       return "transcribed RNA";
3404     case MOLECULE_TYPE_MRNA :
3405       return "mRNA";
3406     case MOLECULE_TYPE_RRNA :
3407       return "rRNA";
3408     case MOLECULE_TYPE_TRNA :
3409       return "tRNA";
3410     case MOLECULE_TYPE_SNRNA :
3411       return "transcribed RNA";
3412     case MOLECULE_TYPE_SCRNA :
3413       return "transcribed RNA";
3414     case MOLECULE_TYPE_PEPTIDE :
3415       break;
3416     case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
3417       switch (bsp->mol) {
3418         case Seq_mol_dna :
3419           return "other DNA";
3420         case Seq_mol_rna :
3421           return "other RNA";
3422         case Seq_mol_na :
3423           break;
3424         default :
3425           break;
3426       }
3427       break;
3428     case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
3429       break;
3430     case MOLECULE_TYPE_CRNA :
3431       return "viral cRNA";
3432       break;
3433     case MOLECULE_TYPE_SNORNA :
3434       return "transcribed RNA";
3435       break;
3436     case MOLECULE_TYPE_TRANSCRIBED_RNA :
3437       return "transcribed RNA";
3438       break;
3439     case MOLECULE_TYPE_NCRNA :
3440       return "transcribed RNA";
3441       break;
3442     case MOLECULE_TYPE_TMRNA :
3443       return "transcribed RNA";
3444       break;
3445     case 255 :
3446       switch (bsp->mol) {
3447         case Seq_mol_dna :
3448           return "other DNA";
3449         case Seq_mol_rna :
3450           return "other RNA";
3451         case Seq_mol_na :
3452           break;
3453         default :
3454           break;
3455       }
3456       break;
3457     default :
3458       break;
3459   }
3460 
3461   return NULL;
3462 }
3463 
ParsePCRPrimerString(QualValPtr qvp)3464 static ValNodePtr ParsePCRPrimerString (
3465   QualValPtr qvp
3466 )
3467 
3468 {
3469   CharPtr       fwd_primer_seq = NULL;
3470   CharPtr       rev_primer_seq = NULL;
3471   CharPtr       fwd_primer_name = NULL;
3472   CharPtr       rev_primer_name = NULL;
3473   SubSourcePtr  ssp;
3474 
3475   if (qvp == NULL) return NULL;
3476 
3477   ssp = qvp [SCQUAL_fwd_primer_seq].ssp;
3478   if (ssp != NULL) {
3479     fwd_primer_seq = ssp->name;
3480   }
3481   ssp = qvp [SCQUAL_rev_primer_seq].ssp;
3482   if (ssp != NULL) {
3483     rev_primer_seq = ssp->name;
3484   }
3485   ssp = qvp [SCQUAL_fwd_primer_name].ssp;
3486   if (ssp != NULL) {
3487     fwd_primer_name = ssp->name;
3488   }
3489   ssp = qvp [SCQUAL_rev_primer_name].ssp;
3490   if (ssp != NULL) {
3491     rev_primer_name = ssp->name;
3492   }
3493 
3494   return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
3495 }
3496 
ParseColonString(CharPtr strs,Boolean multiple)3497 static ValNodePtr ParseColonString (
3498   CharPtr strs,
3499   Boolean multiple
3500 )
3501 
3502 {
3503   ValNodePtr  head = NULL;
3504   size_t      len;
3505   CharPtr     ptr, str, tmp;
3506 
3507   if (StringHasNoText (strs)) return NULL;
3508 
3509   tmp = StringSave (strs);
3510   str = tmp;
3511   len = StringLen (str);
3512   if (len > 1 && StringChr (str, ':') != NULL /* && multiple */) {
3513     while (StringDoesHaveText (str)) {
3514       ptr = StringChr (str, ':');
3515       if (ptr != NULL) {
3516         *ptr = '\0';
3517         ptr++;
3518       }
3519       TrimSpacesAroundString (str);
3520       ValNodeCopyStr (&head, 0, str);
3521       str = ptr;
3522     }
3523   } else {
3524     ValNodeCopyStr (&head, 0, str);
3525   }
3526 
3527   MemFree (tmp);
3528   return head;
3529 }
3530 
PrintHalfPrimer(ValNodePtr PNTR headp,CharPtr name,CharPtr seq,CharPtr nm_label,CharPtr sq_label,CharPtr prefix,Boolean name_only_ok,Boolean multiple)3531 static void PrintHalfPrimer (
3532   ValNodePtr PNTR headp,
3533   CharPtr name,
3534   CharPtr seq,
3535   CharPtr nm_label,
3536   CharPtr sq_label,
3537   CharPtr prefix,
3538   Boolean name_only_ok,
3539   Boolean multiple
3540 )
3541 
3542 {
3543   ValNodePtr  name_list, seq_list, name_vnp, seq_vnp;
3544   CharPtr     str;
3545 
3546   name_list = ParseColonString (name, multiple);
3547   seq_list = ParseColonString (seq, multiple);
3548 
3549   name_vnp = name_list;
3550   seq_vnp = seq_list;
3551   if (seq_vnp != NULL) {
3552     while (seq_vnp != NULL) {
3553       if (name_vnp != NULL) {
3554         str = (CharPtr) name_vnp->data.ptrvalue;
3555         if (StringDoesHaveText (str)) {
3556           ValNodeCopyStr (headp, 0, prefix);
3557           ValNodeCopyStr (headp, 0, nm_label);
3558           ValNodeCopyStr (headp, 0, str);
3559           prefix = ", ";
3560         }
3561         name_vnp = name_vnp->next;
3562       }
3563       str = (CharPtr) seq_vnp->data.ptrvalue;
3564       if (StringDoesHaveText (str)) {
3565         ValNodeCopyStr (headp, 0, prefix);
3566         ValNodeCopyStr (headp, 0, sq_label);
3567         ValNodeCopyStr (headp, 0, str);
3568         prefix = ", ";
3569       }
3570       seq_vnp = seq_vnp->next;
3571     }
3572   } else if (name_only_ok) {
3573     while (name_vnp != NULL) {
3574       str = (CharPtr) name_vnp->data.ptrvalue;
3575       if (StringDoesHaveText (str)) {
3576         ValNodeCopyStr (headp, 0, prefix);
3577         ValNodeCopyStr (headp, 0, nm_label);
3578         ValNodeCopyStr (headp, 0, str);
3579         prefix = ", ";
3580       }
3581       name_vnp = name_vnp->next;
3582     }
3583   }
3584 
3585   ValNodeFreeData (name_list);
3586   ValNodeFreeData (seq_list);
3587 }
3588 
NextPCRPrimerString(PcrSetPtr psp,Boolean isInNote,Boolean multiple)3589 static CharPtr NextPCRPrimerString (
3590   PcrSetPtr psp,
3591   Boolean isInNote,
3592   Boolean multiple
3593 )
3594 
3595 {
3596   ValNodePtr  head = NULL, vnp;
3597   CharPtr     prefix = NULL;
3598   CharPtr     str;
3599 
3600   if (psp == NULL) return NULL;
3601 
3602   if (StringHasNoText (psp->fwd_seq) || StringHasNoText (psp->rev_seq)) {
3603     if (isInNote) {
3604       /*
3605       if (StringDoesHaveText (psp->fwd_name)) {
3606         ValNodeCopyStr (&head, 0, prefix);
3607         ValNodeCopyStr (&head, 0, "fwd_name: ");
3608         ValNodeCopyStr (&head, 0, psp->fwd_name);
3609         prefix = ", ";
3610       }
3611 
3612       if (StringDoesHaveText (psp->fwd_seq)) {
3613         ValNodeCopyStr (&head, 0, prefix);
3614         ValNodeCopyStr (&head, 0, "fwd_seq: ");
3615         ValNodeCopyStr (&head, 0, psp->fwd_seq);
3616         prefix = ", ";
3617       }
3618 
3619       if (StringDoesHaveText (psp->rev_name)) {
3620         ValNodeCopyStr (&head, 0, prefix);
3621         ValNodeCopyStr (&head, 0, "rev_name: ");
3622         ValNodeCopyStr (&head, 0, psp->rev_name);
3623         prefix = ", ";
3624       }
3625 
3626       if (StringDoesHaveText (psp->rev_seq)) {
3627         ValNodeCopyStr (&head, 0, prefix);
3628         ValNodeCopyStr (&head, 0, "rev_seq: ");
3629         ValNodeCopyStr (&head, 0, psp->rev_seq);
3630         prefix = ", ";
3631       }
3632       */
3633       PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
3634       if (head != NULL) {
3635         prefix = ", ";
3636       }
3637       PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
3638     } else {
3639       return StringSave ("");
3640     }
3641   } else {
3642     if (isInNote) return StringSave ("");
3643 
3644     PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
3645     PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3646   }
3647 
3648   if (head != NULL && isInNote) {
3649     vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3650     if (vnp != NULL) {
3651       vnp->next = head;
3652       head = vnp;
3653     }
3654   }
3655 
3656   str = MergeFFValNodeStrs (head);
3657   ValNodeFreeData (head);
3658   return str;
3659 }
3660 
PrintHalfReaction(ValNodePtr PNTR headp,PCRPrimerPtr primers,CharPtr nm_label,CharPtr sq_label,CharPtr prefix,Boolean name_only_ok,Boolean multiple)3661 static void PrintHalfReaction (
3662   ValNodePtr PNTR headp,
3663   PCRPrimerPtr primers,
3664   CharPtr nm_label,
3665   CharPtr sq_label,
3666   CharPtr prefix,
3667   Boolean name_only_ok,
3668   Boolean multiple
3669 )
3670 
3671 {
3672   PCRPrimerPtr  ppp;
3673 
3674   for (ppp = primers; ppp != NULL; ppp = ppp->next) {
3675     if (StringDoesHaveText (ppp->seq)) {
3676       if (StringDoesHaveText (ppp->name)) {
3677         ValNodeCopyStr (headp, 0, prefix);
3678         ValNodeCopyStr (headp, 0, nm_label);
3679         ValNodeCopyStr (headp, 0, ppp->name);
3680         prefix = ", ";
3681       }
3682       ValNodeCopyStr (headp, 0, prefix);
3683       ValNodeCopyStr (headp, 0, sq_label);
3684       ValNodeCopyStr (headp, 0, ppp->seq);
3685       prefix = ", ";
3686     } else if (name_only_ok) {
3687       if (StringDoesHaveText (ppp->name)) {
3688         ValNodeCopyStr (headp, 0, prefix);
3689         ValNodeCopyStr (headp, 0, nm_label);
3690         ValNodeCopyStr (headp, 0, ppp->name);
3691         prefix = ", ";
3692       }
3693     }
3694   }
3695 }
3696 
NextPCRReaction(PCRReactionPtr prp,Boolean isInNote,Boolean multiple)3697 static CharPtr NextPCRReaction (
3698   PCRReactionPtr prp,
3699   Boolean isInNote,
3700   Boolean multiple
3701 )
3702 
3703 {
3704   Boolean       has_fwd_seq = FALSE, has_rev_seq = FALSE;
3705   ValNodePtr    head = NULL, vnp;
3706   PCRPrimerPtr  ppp;
3707   CharPtr       prefix = NULL, str;
3708 
3709   if (prp == NULL) return NULL;
3710 
3711   for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) {
3712     if (StringDoesHaveText (ppp->seq)) {
3713       has_fwd_seq = TRUE;
3714     }
3715   }
3716 
3717   for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) {
3718     if (StringDoesHaveText (ppp->seq)) {
3719       has_rev_seq = TRUE;
3720     }
3721   }
3722 
3723   if (has_fwd_seq && has_rev_seq) {
3724     if (isInNote) {
3725       return StringSave ("");
3726     } else {
3727       PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
3728       PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3729     }
3730   } else {
3731     if (isInNote) {
3732       PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
3733       if (head != NULL) {
3734         prefix = ", ";
3735       }
3736       PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
3737     } else {
3738       return StringSave ("");
3739     }
3740   }
3741 
3742   if (head != NULL && isInNote) {
3743     vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3744     if (vnp != NULL) {
3745       vnp->next = head;
3746       head = vnp;
3747     }
3748   }
3749 
3750   str = MergeFFValNodeStrs (head);
3751   ValNodeFreeData (head);
3752   return str;
3753 }
3754 
3755 /* specimen_voucher, culture_collection, bio_material default institution mouseover */
3756 
3757 typedef struct instcodedata {
3758   CharPtr  code;
3759   CharPtr  name;
3760 } IcCodeData, PNTR IcCodePtr;
3761 
3762 static ValNodePtr      ic_code_list = NULL;
3763 static IcCodePtr PNTR  ic_code_data = NULL;
3764 static Int4            ic_code_len = 0;
3765 static Boolean         ic_code_loaded = FALSE;
3766 
SortVnpByInstCode(VoidPtr ptr1,VoidPtr ptr2)3767 static int LIBCALLBACK SortVnpByInstCode (VoidPtr ptr1, VoidPtr ptr2)
3768 
3769 {
3770   int         compare;
3771   IcCodePtr   irp1, irp2;
3772   CharPtr     str1, str2;
3773   ValNodePtr  vnp1, vnp2;
3774 
3775   if (ptr1 == NULL || ptr2 == NULL) return 0;
3776   vnp1 = *((ValNodePtr PNTR) ptr1);
3777   vnp2 = *((ValNodePtr PNTR) ptr2);
3778   if (vnp1 == NULL || vnp2 == NULL) return 0;
3779   irp1 = (IcCodePtr) vnp1->data.ptrvalue;
3780   irp2 = (IcCodePtr) vnp2->data.ptrvalue;
3781   if (irp1 == NULL || irp2 == NULL) return 0;
3782   str1 = irp1->code;
3783   str2 = irp2->code;
3784   if (str1 == NULL || str2 == NULL) return 0;
3785   compare = StringCmp (str1, str2);
3786   if (compare > 0) {
3787     return 1;
3788   } else if (compare < 0) {
3789     return -1;
3790   }
3791   str1 = irp1->name;
3792   str2 = irp2->name;
3793   if (str1 == NULL || str2 == NULL) return 0;
3794   compare = StringCmp (str1, str2);
3795   if (compare > 0) {
3796     return 1;
3797   } else if (compare < 0) {
3798     return -1;
3799   }
3800   return 0;
3801 }
3802 
SetupInstCodeNameTable(void)3803 static void SetupInstCodeNameTable (void)
3804 
3805 {
3806   FileCache   fc;
3807   CharPtr     file = "institution_codes.txt";
3808   FILE        *fp = NULL;
3809   Int4        i;
3810   IcCodePtr   irp;
3811   ValNodePtr  last = NULL;
3812   Char        line [512];
3813   Char        path [PATH_MAX];
3814   CharPtr     ptr;
3815   ErrSev      sev;
3816   CharPtr     str;
3817   ValNodePtr  vnp;
3818 
3819   if (ic_code_loaded) return;
3820   if (ic_code_data != NULL) return;
3821 
3822   if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
3823     FileBuildPath (path, NULL, file);
3824     sev = ErrSetMessageLevel (SEV_ERROR);
3825     fp = FileOpen (path, "r");
3826     ErrSetMessageLevel (sev);
3827     if (fp != NULL) {
3828       FileCacheSetup (&fc, fp);
3829 
3830       str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
3831       while (str != NULL) {
3832         if (StringDoesHaveText (str)) {
3833           ptr = StringChr (str, '\t');
3834           if (ptr != NULL) {
3835             *ptr = '\0';
3836             ptr++;
3837             ptr = StringChr (ptr, '\t');
3838             if (ptr != NULL) {
3839               *ptr = '\0';
3840               ptr++;
3841               irp = (IcCodePtr) MemNew (sizeof (IcCodeData));
3842               if (irp != NULL) {
3843                 TrimSpacesAroundString (str);
3844                 TrimSpacesAroundString (ptr);
3845                 irp->code = StringSave (str);
3846                 irp->name = StringSave (ptr);
3847                 vnp = ValNodeAddPointer (&last, 0, (Pointer) irp);
3848                 if (ic_code_list == NULL) {
3849                   ic_code_list = vnp;
3850                 }
3851                 last = vnp;
3852               }
3853             }
3854           }
3855         }
3856         str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
3857       }
3858 
3859       FileClose (fp);
3860       ic_code_len = ValNodeLen (ic_code_list);
3861       if (ic_code_len > 0) {
3862         ic_code_list = ValNodeSort (ic_code_list, SortVnpByInstCode);
3863         ic_code_data = (IcCodePtr PNTR) MemNew (sizeof (IcCodePtr) * (ic_code_len + 1));
3864         if (ic_code_data != NULL) {
3865           for (vnp = ic_code_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
3866             irp = (IcCodePtr) vnp->data.ptrvalue;
3867             ic_code_data [i] = irp;
3868           }
3869         }
3870       }
3871     }
3872   }
3873 
3874   ic_code_loaded = TRUE;
3875 }
3876 
FullNameFromInstCode(CharPtr code)3877 static CharPtr FullNameFromInstCode (CharPtr code)
3878 
3879 {
3880   CharPtr    name = NULL;
3881   IcCodePtr  irp;
3882   Int4       L, R, mid;
3883 
3884   if (StringHasNoText (code)) return NULL;
3885 
3886   if (ic_code_data == NULL) {
3887     SetupInstCodeNameTable ();
3888   }
3889   if (ic_code_data == NULL) return NULL;
3890 
3891   L = 0;
3892   R = ic_code_len - 1;
3893   while (L < R) {
3894     mid = (L + R) / 2;
3895     irp = ic_code_data [(int) mid];
3896     if (irp != NULL && StringCmp (irp->code, code) < 0) {
3897       L = mid + 1;
3898     } else {
3899       R = mid;
3900     }
3901   }
3902   irp = ic_code_data [(int) R];
3903   if (irp != NULL && StringCmp (irp->code, code) == 0) {
3904     name = irp->name;
3905   }
3906 
3907   return name;
3908 }
3909 
3910 /* specimen_voucher, culture_collection, bio_material hyperlinks */
3911 
3912 #define s_acbr_base  "http://www.acbr-database.at/BioloMICS.aspx?Link=T&DB=0&Table=0&Descr="
3913 #define s_atcc_base  "http://www.atcc.org/Products/All/"
3914 #define s_bcrc_base  "https://catalog.bcrc.firdi.org.tw/BSAS_cart/controller?event=SEARCH&bcrc_no="
3915 #define s_cas_base   "http://collections.calacademy.org/herp/specimen/"
3916 #define s_cbs_base   "http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+"
3917 #define s_ccap_base  "http://www.ccap.ac.uk/strain_info.php?Strain_No="
3918 #define s_ccmp_base  "https://ccmp.bigelow.org/node/1/strain/CCMP"
3919 #define s_ccug_base  "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno="
3920 #define s_cfmr_base  "http://www.fpl.fs.fed.us/search/mycologysearch_action.php?sorting_rule=1u&phrasesAndKeywords02="
3921 #define s_cori_base  "http://ccr.coriell.org/Sections/Search/Search.aspx?q="
3922 #define s_dsm_base   "https://www.dsmz.de/catalogues/details/culture/DSM-"
3923 #define s_dsmz_base  "https://www.dsmz.de/catalogues/details/culture/PV-"
3924 #define s_frr_base   "http://www.foodscience.csiro.au/cgi-bin/rilax/search.pl?stpos=0&stype=AND&query="
3925 #define s_fsu_base   "http://www.prz.uni-jena.de/data.php?fsu="
3926 #define s_jcm_base   "http://www.jcm.riken.jp/cgi-bin/jcm/jcm_number?JCM="
3927 #define s_kctc_base  "http://kctc.kribb.re.kr/English/_SearchView.aspx?sn="
3928 #define s_ku_base    "https://ichthyology.specify.ku.edu/specify/bycatalog/"
3929 #define s_lcr_base   "http://scd.landcareresearch.co.nz/Specimen/"
3930 #define s_maff_base  "http://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff="
3931 #define s_mcz_base   "http://mczbase.mcz.harvard.edu/guid/"
3932 #define s_mtcc_base  "http://mtcc.imtech.res.in/catalogue_hyper.php?a="
3933 #define s_mucl_base  "http://bccm.belspo.be/db/mucl_search_results.php?FIRSTITEM=1&LIST1=STRAIN_NUMBER&TEXT1="
3934 #define s_nbrc_base  "http://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT="
3935 #define s_ncimb_base "http://www.ncimb.com/BioloMICS.aspx?Table=NCIMBstrains&ExactMatch=T&Fields=All&Name=NCIMB%20"
3936 #define s_nctc_base  "https://www.phe-culturecollections.org.uk/products/bacteria/detail.jsp?collection=nctc&refId=NCTC+"
3937 #define s_nrrl_base  "http://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/report.html?nrrlcodes="
3938 #define s_nrrl_mold  "http://nrrl.ncaur.usda.gov/cgi-bin/usda/mold/report.html?nrrlcodes="
3939 #define s_nrrl_prok  "http://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/report.html?nrrlcodes="
3940 #define s_nrrl_yest  "http://nrrl.ncaur.usda.gov/cgi-bin/usda/yeast/report.html?nrrlcodes="
3941 #define s_pcc_base   "http://www.crbip.pasteur.fr/fiches/fichecata.jsp?crbip=PCC+"
3942 #define s_pcmb_base  "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3="
3943 #define s_pycc_base  "http://pycc.bio-aware.com/BioloMICS.aspx?Table=PYCC%20strains&Name=PYCC%20"
3944 #define s_sag_base   "http://sagdb.uni-goettingen.de/detailedList.php?str_number="
3945 #define s_tgrc_base  "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum="
3946 #define s_uam_base   "http://arctos.database.museum/guid/"
3947 #define s_uamh_base  "https://secure.devonian.ualberta.ca/uamh/details.php?id="
3948 #define s_usnm_base  "http://collections.mnh.si.edu/services/resolver/resolver.php?"
3949 #define s_ypm_base   "http://collections.peabody.yale.edu/search/Record/"
3950 
3951 #define s_colon_pfx  ":"
3952 #define s_uscr_pfx   "_"
3953 
3954 #define s_kui_pfx    "KUI/"
3955 #define s_kuit_pfx   "KUIT/"
3956 #define s_psu_pfx    "PSU:Mamm:"
3957 #define s_usnm_pfx   "voucher=Birds:"
3958 
3959 #define s_ypment_pfx "YPM-ENT-"
3960 #define s_ypmher_pfx "YPM-HER-"
3961 #define s_ypmich_pfx "YPM-ICH-"
3962 #define s_ypmiz_pfx  "YPM-IZ-"
3963 #define s_ypmmam_pfx "YPM-MAM-"
3964 #define s_ypmorn_pfx "YPM-ORN-"
3965 
3966 #define s_acbr_sfx  "&Fields=All&ExactMatch=T"
3967 #define s_atcc_sfx  ".aspx"
3968 #define s_bcrc_sfx  "&type_id=9&keyword="
3969 #define s_ku_sfx    "/"
3970 #define s_mucl_sfx  "&LIST2=ALL+FIELDS&CONJ=OR&RANGE=20&B3=Run+Query"
3971 #define s_pycc_sfx  "&Fields=All&ExactMatch=T"
3972 
3973 typedef struct vouch {
3974   CharPtr  sites;
3975   CharPtr  links;
3976   Boolean  prepend_institute;
3977   Int2     pad_to;
3978   CharPtr  pad_with;
3979   CharPtr  prefix;
3980   CharPtr  suffix;
3981 } VouchData, PNTR VouchDataPtr;
3982 
3983 static VouchData Nlm_spec_vouchers [] = {
3984   {  "ACBR",              s_acbr_base,  FALSE,  0, "",   NULL,          s_acbr_sfx  },
3985   {  "ATCC",              s_atcc_base,  FALSE,  0, "",   NULL,          s_atcc_sfx  },
3986   {  "BCRC",              s_bcrc_base,  FALSE,  0, "",   NULL,          s_bcrc_sfx  },
3987   {  "CAS:HERP",          s_cas_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
3988   {  "CBS",               s_cbs_base,   FALSE,  0, "",   NULL,          NULL        },
3989   {  "CCAP",              s_ccap_base,  FALSE,  0, "",   NULL,          NULL        },
3990   {  "CCMP",              s_ccmp_base,  FALSE,  0, "",   NULL,          NULL        },
3991   {  "CCUG",              s_ccug_base,  FALSE,  0, "",   NULL,          NULL        },
3992   {  "CFMR",              s_cfmr_base,  FALSE,  0, "",   NULL,          NULL        },
3993   {  "CHR",               s_lcr_base,   TRUE,   0, "",   s_uscr_pfx,    NULL        },
3994   {  "Coriell",           s_cori_base,  FALSE,  0, "",   NULL,          NULL        },
3995   {  "CRCM:Bird",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
3996   {  "DGR:Bird",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
3997   {  "DGR:Ento",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
3998   {  "DGR:Fish",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
3999   {  "DGR:Herp",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4000   {  "DGR:Mamm",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4001   {  "DMNS:Bird",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4002   {  "DMNS:Mamm",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4003   {  "DSM",               s_dsm_base,   FALSE,  0, "",   NULL,          NULL        },
4004   {  "DSMZ",              s_dsmz_base,  FALSE,  0, "",   NULL,          NULL        },
4005   {  "FRR",               s_frr_base,   FALSE,  0, "",   NULL,          NULL        },
4006   {  "FSU<DEU>",          s_fsu_base,   FALSE,  0, "",   NULL,          NULL        },
4007   {  "ICMP",              s_lcr_base,   TRUE,   0, "",   s_uscr_pfx,    NULL        },
4008   {  "JCM",               s_jcm_base,   FALSE,  0, "",   NULL,          NULL        },
4009   {  "KCTC",              s_kctc_base,  FALSE,  0, "",   NULL,          NULL        },
4010   {  "KNWR:Ento",         s_uam_base ,  TRUE,   0, "",   s_colon_pfx,   NULL        },
4011   {  "KU:I",              s_ku_base,    FALSE,  0, "",   s_kui_pfx,     s_ku_sfx    },
4012   {  "KU:IT",             s_ku_base,    FALSE,  0, "",   s_kuit_pfx,    s_ku_sfx    },
4013   {  "KWP:Ento",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4014   {  "MAFF",              s_maff_base,  FALSE,  0, "",   NULL,          NULL        },
4015   {  "MCZ:Bird",          s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4016   {  "MCZ:Cryo",          s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4017   {  "MCZ:Ent",           s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4018   {  "MCZ:Fish",          s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4019   {  "MCZ:Herp",          s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4020   {  "MCZ:Ich",           s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4021   {  "MCZ:IP",            s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4022   {  "MCZ:IZ",            s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4023   {  "MCZ:Mala",          s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4024   {  "MCZ:Mamm",          s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4025   {  "MCZ:Orn",           s_mcz_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4026   {  "MLZ:Bird",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4027   {  "MLZ:Mamm",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4028   {  "MSB:Bird",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4029   {  "MSB:Mamm",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4030   {  "MSB:Para",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4031   {  "MTCC",              s_mtcc_base,  FALSE,  0, "",   NULL,          NULL        },
4032   {  "MUCL",              s_mucl_base,  FALSE,  0, "",   NULL,          s_mucl_sfx  },
4033   {  "MVZ:Bird",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4034   {  "MVZ:Egg",           s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4035   {  "MVZ:Herp",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4036   {  "MVZ:Hild",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4037   {  "MVZ:Img",           s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4038   {  "MVZ:Mamm",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4039   {  "MVZ:Page",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4040   {  "MVZObs:Herp",       s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4041   {  "NBRC",              s_nbrc_base,  FALSE,  8, "0",  NULL,          NULL        },
4042   {  "NBSB:Bird",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4043   {  "NCIMB",             s_ncimb_base, FALSE,  0, "",   NULL,          NULL        },
4044   {  "NCTC",              s_nctc_base,  FALSE,  0, "",   NULL,          NULL        },
4045   {  "NRRL",              s_nrrl_base,  FALSE,  0, "",   NULL,          NULL        },
4046   {  "NRRL:MOLD",         s_nrrl_mold,  FALSE,  0, "",   NULL,          NULL        },
4047   {  "NRRL:PROK",         s_nrrl_prok,  FALSE,  0, "",   NULL,          NULL        },
4048   {  "NRRL:YEAST",        s_nrrl_yest,  FALSE,  0, "",   NULL,          NULL        },
4049   {  "NZAC",              s_lcr_base,   TRUE,   0, "",   s_uscr_pfx,    NULL        },
4050   {  "PCC",               s_pcc_base,   FALSE,  0, "",   NULL,          NULL        },
4051   {  "PCMB",              s_pcmb_base,  FALSE,  0, "",   NULL,          NULL        },
4052   {  "PDD",               s_lcr_base,   TRUE ,  0, "",   s_uscr_pfx,    NULL        },
4053   {  "PSU<USA-OR>:Mamm",  s_uam_base,   FALSE,  0, "",   s_psu_pfx,     NULL        },
4054   {  "PYCC",              s_pycc_base,  FALSE,  0, "",   NULL,          s_pycc_sfx  },
4055   {  "SAG",               s_sag_base,   FALSE,  0, "",   NULL,          NULL        },
4056   {  "TGRC",              s_tgrc_base,  FALSE,  0, "",   NULL,          NULL        },
4057   {  "UAM:Bird",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4058   {  "UAM:Bryo",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4059   {  "UAM:Crus",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4060   {  "UAM:Ento",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4061   {  "UAM:Fish",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4062   {  "UAM:Herb",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4063   {  "UAM:Herp",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4064   {  "UAM:Mamm",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4065   {  "UAM:Moll",          s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4066   {  "UAM:Paleo",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4067   {  "UAMH",              s_uamh_base,  FALSE,  0, "",   NULL,          NULL        },
4068   {  "UAMObs:Mamm",       s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4069   {  "USNM:Birds",        s_usnm_base,  FALSE,  0, "",   s_usnm_pfx,    NULL        },
4070   {  "WNMU:Bird",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4071   {  "WNMU:Fish",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4072   {  "WNMU:Mamm",         s_uam_base,   TRUE,   0, "",   s_colon_pfx,   NULL        },
4073   {  "YPM:ENT",           s_ypm_base,   FALSE,  6, "0",  s_ypment_pfx,  NULL        },
4074   {  "YPM:HER",           s_ypm_base,   FALSE,  6, "0",  s_ypmher_pfx,  NULL        },
4075   {  "YPM:ICH",           s_ypm_base,   FALSE,  6, "0",  s_ypmich_pfx,  NULL        },
4076   {  "YPM:IZ",            s_ypm_base,   FALSE,  6, "0",  s_ypmiz_pfx,   NULL        },
4077   {  "YPM:MAM",           s_ypm_base,   FALSE,  6, "0",  s_ypmmam_pfx,  NULL        },
4078   {  "YPM:ORN",           s_ypm_base,   FALSE,  6, "0",  s_ypmorn_pfx,  NULL        },
4079   {  NULL,                NULL,         FALSE,  0, "",   NULL,          NULL        }
4080 };
4081 
VoucherNameIsValid(CharPtr name)4082 static Int2 VoucherNameIsValid (
4083   CharPtr name
4084 )
4085 
4086 {
4087   Int2     L, R, mid;
4088   CharPtr  ptr;
4089   Char     str [256];
4090 
4091   if (StringHasNoText (name)) return -1;
4092   StringNCpy_0 (str, name, sizeof (str));
4093   ptr = StringChr (str, ' ');
4094   if (ptr != NULL) {
4095     *ptr = '\0';
4096   }
4097 
4098   L = 0;
4099   R = sizeof (Nlm_spec_vouchers) / sizeof (Nlm_spec_vouchers [0]) - 1; /* -1 because now NULL terminated */
4100 
4101   while (L < R) {
4102     mid = (L + R) / 2;
4103     if (StringICmp (Nlm_spec_vouchers [mid].sites, str) < 0) {
4104       L = mid + 1;
4105     } else {
4106       R = mid;
4107     }
4108   }
4109 
4110   /* switch to case sensitive comparison to restore case strictness */
4111 
4112   if (StringICmp (Nlm_spec_vouchers [R].sites, str) == 0) {
4113     return R;
4114   }
4115 
4116   return -1;
4117 }
4118 
4119 /* works on subname copy that it can change */
4120 
ParseSecVoucher(CharPtr subname,CharPtr PNTR inst,CharPtr PNTR id)4121 static Boolean ParseSecVoucher (
4122   CharPtr subname,
4123   CharPtr PNTR inst,
4124   CharPtr PNTR id
4125 )
4126 
4127 {
4128   CharPtr  ptr;
4129   CharPtr  tmp;
4130 
4131   if (StringHasNoText (subname)) return FALSE;
4132   if (StringLen (subname) < 5) return FALSE;
4133   TrimSpacesAroundString (subname);
4134 
4135   ptr = StringChr (subname, ':');
4136   if (ptr == NULL) return FALSE;
4137 
4138   *inst = subname;
4139 
4140   tmp = StringChr (ptr + 1, ':');
4141   if (tmp != NULL) {
4142     *tmp = '\0';
4143     tmp++;
4144     TrimSpacesAroundString (tmp);
4145     *id = tmp;
4146   } else {
4147     *ptr = '\0';
4148     ptr++;
4149     TrimSpacesAroundString (ptr);
4150     *id = ptr;
4151   }
4152 
4153   if (StringHasNoText (*inst) || StringHasNoText (*id)) return FALSE;
4154 
4155   return TRUE;
4156 }
4157 
Do_www_specimen_voucher(StringItemPtr ffstring,CharPtr inst,CharPtr id,VouchDataPtr vdp)4158 static void Do_www_specimen_voucher (
4159   StringItemPtr ffstring,
4160   CharPtr inst,
4161   CharPtr id,
4162   VouchDataPtr vdp
4163 )
4164 
4165 {
4166   size_t   len_id, len_pad;
4167   CharPtr  mouseover = NULL;
4168 
4169   if ( ffstring == NULL || inst == NULL || id == NULL || vdp == NULL || vdp->links == NULL ) return;
4170 
4171   mouseover = FullNameFromInstCode (inst);
4172   if (mouseover != NULL) {
4173     FFAddOneString (ffstring, "<acronym title=\"", FALSE, FALSE, TILDE_IGNORE);
4174     FFAddOneString (ffstring, mouseover, FALSE, FALSE, TILDE_IGNORE);
4175     FFAddOneString(ffstring, "\" class=\"voucher\">", FALSE, FALSE, TILDE_IGNORE);
4176     FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
4177     FFAddOneString (ffstring, "</acronym>", FALSE, FALSE, TILDE_IGNORE);
4178   } else {
4179     FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
4180   }
4181   FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
4182   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4183   FFAddOneString (ffstring, vdp->links, FALSE, FALSE, TILDE_IGNORE);
4184   if (vdp->prepend_institute) {
4185     FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
4186   }
4187   if (vdp->prefix != NULL) {
4188     FFAddOneString (ffstring, vdp->prefix, FALSE, FALSE, TILDE_IGNORE);
4189   }
4190   if (vdp->pad_to > 0) {
4191     len_id = StringLen (id);
4192     len_pad = StringLen (vdp->pad_with);
4193     while (len_id < vdp->pad_to) {
4194       FFAddOneString (ffstring, vdp->pad_with, FALSE, FALSE, TILDE_IGNORE);
4195       len_id += len_pad;
4196     }
4197   }
4198   FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
4199   if (vdp->suffix != NULL) {
4200     FFAddOneString (ffstring, vdp->suffix, FALSE, FALSE, TILDE_IGNORE);
4201   }
4202   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
4203   FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
4204   FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4205 }
4206 
FF_www_specimen_voucher(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr subname)4207 NLM_EXTERN void FF_www_specimen_voucher (
4208   IntAsn2gbJobPtr ajp,
4209   StringItemPtr ffstring,
4210   CharPtr subname
4211 )
4212 
4213 {
4214   Char          buf [512];
4215   CharPtr       inst = NULL, id = NULL, mouseover = NULL, encoded;
4216   Int2          R;
4217   VouchDataPtr  vdp;
4218 
4219   if ( ffstring == NULL || subname == NULL ) return;
4220   if (! GetWWW (ajp)) { /* not in www mode */
4221     FFAddTextToString(ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4222     return;
4223   }
4224   StringNCpy_0 (buf, subname, sizeof (buf));
4225   if (! ParseSecVoucher (buf, &inst, &id)) {
4226     FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4227     return;
4228   }
4229   R = VoucherNameIsValid (inst);
4230   if (R < 0) {
4231     mouseover = FullNameFromInstCode (inst);
4232     if (mouseover != NULL) {
4233       FFAddOneString (ffstring, "<acronym title=\"", FALSE, FALSE, TILDE_IGNORE);
4234       FFAddOneString (ffstring, mouseover, FALSE, FALSE, TILDE_IGNORE);
4235       FFAddOneString(ffstring, "\" class=\"voucher\">", FALSE, FALSE, TILDE_IGNORE);
4236       encoded = EncodeXml (inst);
4237       FFAddOneString (ffstring, encoded, FALSE, FALSE, TILDE_IGNORE);
4238       MemFree (encoded);
4239       FFAddOneString (ffstring, "</acronym>", FALSE, FALSE, TILDE_IGNORE);
4240       FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
4241       FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
4242     } else {
4243       FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4244     }
4245     return;
4246   }
4247   vdp = &(Nlm_spec_vouchers [R]);
4248   if (vdp == NULL || vdp->links == NULL) {
4249     FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4250     return;
4251   }
4252   encoded = EncodeXml (inst);
4253   Do_www_specimen_voucher (ffstring, encoded, id, vdp);
4254   MemFree (encoded);
4255 }
4256 
Do_www_lat_lon(StringItemPtr ffstring,CharPtr lat_lon)4257 static void Do_www_lat_lon (
4258   StringItemPtr ffstring,
4259   CharPtr lat_lon
4260 )
4261 
4262 {
4263   Char     buf [128];
4264   Char     ch;
4265   CharPtr  ew = "";
4266   Int2     i;
4267   CharPtr  ns = "";
4268   CharPtr  ptr;
4269   Char     tmp [128];
4270   CharPtr  tokens [6];
4271 
4272   if ( ffstring == NULL || lat_lon == NULL ) return;
4273 
4274   MemSet ((Pointer) tokens, 0, sizeof (tokens));
4275 
4276   StringNCpy_0 (buf, lat_lon, sizeof (buf));
4277 
4278   i = 0;
4279   ptr = buf;
4280   ch = *ptr;
4281   tokens [i] = ptr;
4282   while (ch != '\0' && i < 5) {
4283     if (ch == ' ') {
4284       *ptr = '\0';
4285       ptr++;
4286       ch = *ptr;
4287       while (ch == ' ') {
4288         ptr++;
4289         ch = *ptr;
4290       }
4291       i++;
4292       tokens [i] = ptr;
4293     } else {
4294       ptr++;
4295       ch = *ptr;
4296     }
4297   }
4298 
4299   ptr = tokens [1];
4300   if (ptr != NULL && *ptr == 'S') {
4301     ns = "-";
4302   }
4303   ptr = tokens [3];
4304   if (ptr != NULL && *ptr == 'W') {
4305     ew = "-";
4306   }
4307 
4308   if (tokens [0] == NULL) {
4309     tokens [0] = "?";
4310   }
4311   if (tokens [2] == NULL) {
4312     tokens [2] = "?";
4313   }
4314 
4315   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4316   /*
4317   FF_Add_NCBI_Base_URL (ffstring, link_lat_lon);
4318   sprintf (tmp, "lat=%s%s&lon=%s%s", ns, tokens [0], ew, tokens [2]);
4319   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
4320   */
4321   FF_Add_NCBI_Base_URL (ffstring, "https://www.google.com/maps/place/");
4322   sprintf (tmp, "%s%s+%s%s", ns, tokens [0], ew, tokens [2]);
4323   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
4324   FFAddTextToString (ffstring, "\">", lat_lon, "</a>", FALSE, FALSE, TILDE_IGNORE);
4325 }
4326 
FF_www_lat_lon(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr lat_lon)4327 static void FF_www_lat_lon (
4328   IntAsn2gbJobPtr ajp,
4329   StringItemPtr ffstring,
4330   CharPtr lat_lon
4331 )
4332 
4333 {
4334   Boolean  format_ok = FALSE;
4335   FloatHi  lat = 0.0;
4336   FloatHi  lon = 0.0;
4337   Boolean  lat_in_range = FALSE;
4338   Boolean  lon_in_range = FALSE;
4339   Boolean  precision_ok = FALSE;
4340 
4341   if ( ffstring == NULL || lat_lon == NULL ) return;
4342   if (! GetWWW (ajp)) { /* not in www mode */
4343     FFAddTextToString(ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4344     return;
4345   }
4346   if (StringDoesHaveText (lat_lon)) {
4347     IsCorrectLatLonFormat (lat_lon, &format_ok, &precision_ok, &lat_in_range, &lon_in_range);
4348     if (format_ok && lat_in_range && lon_in_range) {
4349       if (ParseLatLon (lat_lon, &lat, &lon)) {
4350         Do_www_lat_lon (ffstring, lat_lon);
4351         return;
4352       }
4353     }
4354   }
4355 
4356   /* if any of above tests failed, default print */
4357   FFAddTextToString (ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4358 }
4359 
FormatSourceFeatBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)4360 NLM_EXTERN CharPtr FormatSourceFeatBlock (
4361   Asn2gbFormatPtr afp,
4362   BaseBlockPtr bbp
4363 )
4364 
4365 {
4366   Boolean            add_period;
4367   IntAsn2gbJobPtr    ajp;
4368   Asn2gbSectPtr      asp;
4369   BioSourcePtr       biop = NULL;
4370   BioseqPtr          bsp;
4371   BioseqSetPtr       bssp;
4372   Char               buf [128], pfx [512], sfx [128];
4373   CharPtr            common = NULL;
4374   Char               currAccVer [SEQID_MAX_LEN];
4375   DbtagPtr           dbt;
4376   SeqMgrDescContext  dcontext;
4377   SeqMgrFeatContext  fcontext;
4378   GBFeaturePtr       gbfeat = NULL;
4379   GBSeqPtr           gbseq;
4380   Int2               i;
4381   IntAsn2gbSectPtr   iasp;
4382   Uint1              idx;
4383   IntSrcBlockPtr     isp;
4384   Boolean            is_desc = TRUE;
4385   Boolean            is_gps = FALSE;
4386   Boolean            is_other = FALSE;
4387   Boolean            is_est_or_gss = FALSE;
4388   Boolean            is_bc;
4389   Boolean            is_rf;
4390   Boolean            is_sc;
4391   Int2               j;
4392   Uint1              jdx;
4393   CharPtr            js = NULL;
4394   Uint1              lastomptype;
4395   Uint1              lastssptype;
4396   SeqLocPtr          location = NULL;
4397   MolInfoPtr         mip;
4398   CharPtr            notestr;
4399   SourceType PNTR    notetbl = NULL;
4400   Boolean            okay;
4401   ObjectIdPtr        oip;
4402   OrgModPtr          omp;
4403   OrgNamePtr         onp = NULL;
4404   OrgRefPtr          orp = NULL;
4405   Boolean            partial5;
4406   Boolean            partial3;
4407   CharPtr            prefix;
4408   PCRReactionPtr     prp;
4409   ValNodePtr         pset;
4410   PcrSetPtr          psp;
4411   SourceType PNTR    qualtbl = NULL;
4412   QualValPtr         qvp;
4413   SeqDescrPtr        sdp = NULL;
4414   SeqEntryPtr        sep;
4415   SeqFeatPtr         sfp = NULL;
4416   SeqIdPtr           sip;
4417   SubSourcePtr       ssp;
4418   CharPtr            str;
4419   BioseqPtr          target;
4420   CharPtr            taxname = NULL;
4421   ValNodePtr         vnp;
4422   StringItemPtr      ffstring, unique;
4423 
4424   if (afp == NULL || bbp == NULL) return NULL;
4425   ajp = afp->ajp;
4426   if (ajp == NULL) return NULL;
4427   asp = afp->asp;
4428   if (asp == NULL) return NULL;
4429   target = asp->target;
4430   bsp = asp->bsp;
4431   if (target == NULL || bsp == NULL) return NULL;
4432   qvp = afp->qvp;
4433   if (qvp == NULL) return NULL;
4434 
4435   pfx [0] = '\0';
4436   sfx [0] = '\0';
4437 
4438   if (ajp->gbseq) {
4439     gbseq = &asp->gbseq;
4440   } else {
4441     gbseq = NULL;
4442   }
4443 
4444   /* five-column feature table uses special code for formatting */
4445 
4446   if (ajp->format == FTABLE_FMT) {
4447     str = FormatFtableSourceFeatBlock (bbp, target);
4448     return str;
4449   }
4450 
4451   /* otherwise do regular flatfile formatting */
4452 
4453   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
4454 
4455   isp = (IntSrcBlockPtr) bbp;
4456 
4457   /* could be descriptor or feature */
4458 
4459   if (bbp->itemtype == OBJ_SEQDESC) {
4460     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
4461     if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
4462       biop = (BioSourcePtr) sdp->data.ptrvalue;
4463     }
4464   } else if (bbp->itemtype == OBJ_SEQFEAT) {
4465     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
4466     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
4467       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
4468     }
4469     is_desc = FALSE;
4470   }
4471 
4472   if (biop == NULL) return NULL;
4473 
4474   unique = FFGetString(ajp);
4475   if ( unique == NULL ) return NULL;
4476 
4477   ffstring = FFGetString(ajp);
4478   if ( ffstring == NULL ) return NULL;
4479 
4480   FFStartPrint (ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
4481 
4482   /*
4483   for (sip = bsp->id; sip != NULL; sip = sip->next) {
4484     if (sip->choice == SEQID_GI) {
4485       currGi = (BIG_ID) sip->data.intvalue;
4486     }
4487   }
4488   */
4489   currAccVer [0] = '\0';
4490   GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, TRUE);
4491 
4492   iasp = (IntAsn2gbSectPtr) asp;
4493 
4494   if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
4495     if (iasp->feat_key [FEATDEF_BIOSRC] == NULL) {
4496       iasp->feat_key [FEATDEF_BIOSRC] = StringSave ("source");
4497     }
4498   }
4499 
4500   if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans &&
4501       (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
4502     sprintf (pfx, "<span id=\"feature_%s_source_%ld\" class=\"feature\">", currAccVer, (long) isp->source_count);
4503   }
4504 
4505   FFAddOneString (ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
4506   FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
4507 
4508   if (gbseq != NULL) {
4509     gbfeat = GBFeatureNew ();
4510     if (gbfeat != NULL) {
4511       gbfeat->key = StringSave ("source");
4512     }
4513   }
4514 
4515   location = isp->loc;
4516 
4517   str = FFFlatLoc (ajp, bsp, location, ajp->masterStyle, FALSE);
4518 
4519   /* if multi-interval join remainders for focus after subtraction, switch to order */
4520   if (sdp != NULL && biop != NULL && biop->is_focus && StringStr (str, "join") != NULL) {
4521     FindReplaceString (&str, "join", "order", FALSE, FALSE);
4522   }
4523 
4524   if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans) {
4525     js = AddJsInterval (iasp, pfx, bsp, FEATDEF_BIOSRC, location, currAccVer);
4526   }
4527   if ( GetWWW(ajp) ) {
4528     FF_www_featloc (ffstring, str);
4529   } else {
4530     FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
4531   }
4532   FFAddOneChar(ffstring, '\n', FALSE);
4533 
4534   if (gbseq != NULL) {
4535     if (gbfeat != NULL) {
4536       if (StringDoesHaveText (str)) {
4537         gbfeat->location = StringSave (str);
4538       } else {
4539         gbfeat->location = StringSave ("");
4540       }
4541       if (StringDoesHaveText (str)) {
4542         if (StringStr (str, "join") != NULL) {
4543           gbfeat->operator__ = StringSave ("join");
4544         } else if (StringStr (str, "order") != NULL) {
4545           gbfeat->operator__ = StringSave ("order");
4546         }
4547       }
4548       CheckSeqLocForPartial (location, &partial5, &partial3);
4549       gbfeat->partial5 = partial5;
4550       gbfeat->partial3 = partial3;
4551       if (ajp->masterStyle) {
4552         AddIntervalsToGbfeat (gbfeat, location, bsp);
4553       } else {
4554         AddIntervalsToGbfeat (gbfeat, location, NULL);
4555       }
4556     }
4557   }
4558 
4559   MemFree (str);
4560 
4561   orp = biop->org;
4562   if (orp != NULL) {
4563     taxname = orp->taxname;
4564     /* common = orp->common; */
4565   }
4566   if (StringHasNoText (taxname)) {
4567     if (ajp->flags.needOrganismQual) {
4568       taxname = "unknown";
4569       if (orp != NULL) {
4570         common = orp->common;
4571       }
4572 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
4573     } else {
4574       taxname = "unknown";
4575       common = orp->common;
4576 #endif
4577     }
4578   }
4579 
4580   sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
4581   if (sep != NULL && IS_Bioseq_set (sep)) {
4582     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4583     if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
4584       is_gps = TRUE;
4585     }
4586   }
4587 
4588   if (bsp != NULL) {
4589     for (sip = bsp->id; sip != NULL; sip = sip->next) {
4590       if (sip->choice == SEQID_OTHER) {
4591         is_other = TRUE;
4592       }
4593     }
4594   }
4595 
4596   if (ajp->refseqConventions) {
4597     is_other = TRUE;
4598   }
4599 
4600   /* populate qualifier table from biosource fields */
4601 
4602   qvp [SCQUAL_organism].str = taxname;
4603   qvp [SCQUAL_common_name].str = common;
4604 
4605   if (biop->is_focus) {
4606     qvp [SCQUAL_focus].ble = TRUE;
4607   }
4608 
4609   str = GetMolTypeQual (bsp);
4610   /*
4611   if (StringICmp (str, "ncRNA") == 0) {
4612     str = "other RNA";
4613   }
4614   */
4615   if (str == NULL) {
4616     switch (bsp->mol) {
4617       case Seq_mol_dna :
4618         str = "unassigned DNA";
4619         break;
4620       case Seq_mol_rna :
4621         str = "unassigned RNA";
4622         break;
4623       case Seq_mol_aa :
4624         break;
4625       default :
4626         str = "unassigned DNA";
4627         break;
4628     }
4629   }
4630   qvp [SCQUAL_mol_type].str = str;
4631 
4632   SubSourceToQualArray (biop->subtype, qvp);
4633 
4634   if (orp != NULL) {
4635     onp = orp->orgname;
4636     if (onp != NULL) {
4637       OrgModToQualArray (onp->mod, qvp);
4638     }
4639 
4640     if (! is_desc) {
4641       qvp [SCQUAL_unstructured].vnp = orp->mod;
4642     }
4643     qvp [SCQUAL_db_xref].vnp = orp->db;
4644   }
4645 
4646   if (sfp != NULL) {
4647     qvp [SCQUAL_org_xref].vnp = sfp->dbxref;
4648   }
4649 
4650   /* organelle currently prints /mitochondrion, /virion, etc. */
4651 
4652   qvp [SCQUAL_organelle].num = biop->genome;
4653 
4654   /* some qualifiers are flags in genome and names in subsource, print once with name */
4655 
4656   if (qvp [SCQUAL_ins_seq_name].ssp != NULL &&
4657       qvp [SCQUAL_organelle].num == GENOME_insertion_seq) {
4658     qvp [SCQUAL_organelle].num = 0;
4659   }
4660   if (qvp [SCQUAL_plasmid_name].ssp != NULL &&
4661       qvp [SCQUAL_organelle].num == GENOME_plasmid) {
4662     qvp [SCQUAL_organelle].num = 0;
4663   }
4664   /* AF095904.1
4665   if (qvp [SCQUAL_plastid_name].ssp != NULL &&
4666       qvp [SCQUAL_organelle].num == GENOME_plastid) {
4667     qvp [SCQUAL_organelle].num = 0;
4668   }
4669   */
4670   if (qvp [SCQUAL_transposon_name].ssp != NULL &&
4671       qvp [SCQUAL_organelle].num == GENOME_transposon) {
4672     qvp [SCQUAL_organelle].num = 0;
4673   }
4674 
4675   if (sfp != NULL) {
4676     qvp [SCQUAL_seqfeat_note].str = sfp->comment;
4677   }
4678 
4679   if (qvp [SCQUAL_fwd_primer_name].ssp != NULL ||
4680       qvp [SCQUAL_fwd_primer_seq].ssp != NULL ||
4681       qvp [SCQUAL_rev_primer_name].ssp != NULL ||
4682       qvp [SCQUAL_rev_primer_seq].ssp != NULL) {
4683     qvp [SCQUAL_PCR_primers].ble = TRUE;
4684     qvp [SCQUAL_PCR_primer_note].ble = TRUE;
4685   }
4686 
4687   if (biop->pcr_primers != NULL) {
4688     qvp [SCQUAL_PCR_reaction].prp = biop->pcr_primers;
4689   }
4690 
4691   if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
4692     /* leave metagenome_source as a separate qualifier */
4693   } else {
4694     /* move metagenome_source to note */
4695     qvp [SCQUAL_metagenome_note].omp = qvp [SCQUAL_metagenome_source].omp;
4696     qvp [SCQUAL_metagenome_source].omp = NULL;
4697   }
4698 
4699   if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
4700     if (qvp [SCQUAL_altitude].ssp != NULL) {
4701       ssp = qvp [SCQUAL_altitude].ssp;
4702       if (! AltitudeIsValid (ssp->name)) {
4703         qvp [SCQUAL_altitude].ssp = NULL;
4704       }
4705     }
4706     if (qvp [SCQUAL_type_material].omp != NULL) {
4707       ssp = qvp [SCQUAL_type_material].ssp;
4708       if (! TypeMaterialIsValid (ssp->name)) {
4709         qvp [SCQUAL_type_material].ssp = NULL;
4710       }
4711     }
4712   }
4713 
4714 #if 0
4715   if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
4716     /* leave mating_type as a separate qualifier */
4717   } else if (qvp [SCQUAL_sex].ssp == NULL &&  qvp [SCQUAL_mating_type].ssp != NULL) {
4718     /* move mating_type to sex if available */
4719     qvp [SCQUAL_sex].ssp = qvp [SCQUAL_mating_type].ssp;
4720     qvp [SCQUAL_mating_type].ssp = NULL;
4721   }
4722 #endif
4723 
4724   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
4725   if (sdp != NULL) {
4726     mip = (MolInfoPtr) sdp->data.ptrvalue;
4727     if (mip != NULL) {
4728       if (mip->tech == MI_TECH_est || mip->tech == MI_TECH_survey) {
4729         is_est_or_gss = TRUE;
4730       }
4731     }
4732   }
4733 
4734   /* now print qualifiers from table */
4735 
4736   qualtbl = source_qual_order;
4737   if (is_desc) {
4738     notetbl = source_desc_note_order;
4739   } else {
4740     notetbl = source_feat_note_order;
4741   }
4742 
4743   for (i = 0, idx = qualtbl [i]; idx != 0; i++, idx = qualtbl [i]) {
4744 
4745     lastomptype = 0;
4746     lastssptype = 0;
4747     switch (asn2gnbk_source_quals [idx].qualclass) {
4748 
4749       case Qual_class_ignore :
4750         break;
4751 
4752       case Qual_class_string :
4753         if (! StringHasNoText (qvp [idx].str)) {
4754           FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4755                             FALSE, FALSE, TILDE_IGNORE);
4756           FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
4757                             FALSE, FALSE, TILDE_TO_SPACES);
4758           FFAddOneChar(ffstring, '\n', FALSE);
4759         }
4760         break;
4761 
4762       case Qual_class_boolean :
4763         if (qvp [idx].ble) {
4764           FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
4765                             FALSE, TRUE, TILDE_IGNORE);
4766         }
4767         break;
4768 
4769       case Qual_class_organelle :
4770         j = (Int2) qvp [idx].num;
4771         if (j < sizeof (organelleQual) / sizeof (CharPtr)) {
4772           if (organelleQual [j] != NULL) {
4773             FFAddTextToString(ffstring, NULL, organelleQual[j], "\n",
4774                               FALSE, FALSE, TILDE_IGNORE);
4775           }
4776         }
4777         break;
4778 
4779       case Qual_class_orgmod :
4780         omp = qvp [idx].omp;
4781         if (lastomptype == 0 && omp != NULL) {
4782           lastomptype = omp->subtype;
4783         }
4784         while (omp != NULL && omp->subtype == lastomptype) {
4785           if (StringIsJustQuotes (omp->subname)) {
4786             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4787                               FALSE, TRUE, TILDE_IGNORE);
4788           } else if (! StringHasNoText (omp->subname)) {
4789             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4790                               FALSE, TRUE, TILDE_IGNORE);
4791             FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
4792                               FALSE, TRUE, TILDE_TO_SPACES);
4793           }
4794           omp = omp->next;
4795         }
4796         break;
4797 
4798       case Qual_class_voucher :
4799         omp = qvp [idx].omp;
4800         if (lastomptype == 0 && omp != NULL) {
4801           lastomptype = omp->subtype;
4802         }
4803         while (omp != NULL && omp->subtype == lastomptype) {
4804           if (StringIsJustQuotes (omp->subname)) {
4805             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4806                               FALSE, TRUE, TILDE_IGNORE);
4807           } else if (! StringHasNoText (omp->subname)) {
4808             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
4809                               FALSE, TRUE, TILDE_IGNORE);
4810             FF_www_specimen_voucher(ajp, ffstring, omp->subname);
4811             FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4812           }
4813           omp = omp->next;
4814         }
4815         break;
4816 
4817       case Qual_class_lat_lon :
4818         omp = qvp [idx].omp;
4819         if (lastomptype == 0 && omp != NULL) {
4820           lastomptype = omp->subtype;
4821         }
4822         while (omp != NULL && omp->subtype == lastomptype) {
4823           if (StringIsJustQuotes (omp->subname)) {
4824             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4825                               FALSE, TRUE, TILDE_IGNORE);
4826           } else if (! StringHasNoText (omp->subname)) {
4827             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
4828                               FALSE, TRUE, TILDE_IGNORE);
4829             FF_www_lat_lon(ajp, ffstring, omp->subname);
4830             FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4831           }
4832           omp = omp->next;
4833         }
4834         break;
4835 
4836       case Qual_class_subsource :
4837         ssp = qvp [idx].ssp;
4838         if (lastssptype == 0 && ssp != NULL) {
4839           lastssptype = ssp->subtype;
4840         }
4841         while (ssp != NULL && ssp->subtype == lastssptype) {
4842           if (ssp->subtype == SUBSRC_germline ||
4843               ssp->subtype == SUBSRC_rearranged ||
4844               ssp->subtype == SUBSRC_transgenic ||
4845               ssp->subtype == SUBSRC_environmental_sample ||
4846               ssp->subtype == SUBSRC_metagenomic) {
4847             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
4848                               FALSE, TRUE, TILDE_TO_SPACES);
4849           } else if (StringIsJustQuotes (ssp->name)) {
4850             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4851                               FALSE, TRUE, TILDE_IGNORE);
4852           } else if (! StringHasNoText (ssp->name)) {
4853             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4854                               FALSE, TRUE, TILDE_IGNORE);
4855             FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
4856                               FALSE, TRUE, TILDE_TO_SPACES);
4857           }
4858           ssp = ssp->next;
4859         }
4860         break;
4861 
4862       case Qual_class_pcr :
4863         if (qvp [idx].ble) {
4864           lastssptype = 0;
4865           pset = ParsePCRPrimerString (qvp);
4866           for (vnp = pset; vnp != NULL; vnp = vnp->next) {
4867             psp = (PcrSetPtr) vnp->data.ptrvalue;
4868             if (psp == NULL) continue;
4869             str = NextPCRPrimerString (psp, FALSE, (Boolean) (pset->next != NULL));
4870             if (str == NULL) continue;
4871             if (! StringHasNoText (str)) {
4872               FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4873                                 FALSE, TRUE, TILDE_IGNORE);
4874               FFAddTextToString(ffstring, "\"", str, "\"\n",
4875                                 FALSE, TRUE, TILDE_TO_SPACES);
4876             }
4877             MemFree (str);
4878           }
4879           FreePCRSet (pset);
4880         }
4881         break;
4882 
4883       case Qual_class_pcr_react :
4884         prp = qvp [idx].prp;
4885         while (prp != NULL) {
4886           str = NextPCRReaction (prp, FALSE, (Boolean) (prp->next != NULL));
4887           if (StringDoesHaveText (str)) {
4888             FFAddTextToString (ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4889                                FALSE, TRUE, TILDE_IGNORE);
4890             FFAddTextToString (ffstring, "\"", str, "\"\n",
4891                                FALSE, TRUE, TILDE_TO_SPACES);
4892           }
4893           MemFree (str);
4894           prp = prp->next;
4895         }
4896         break;
4897 
4898       case Qual_class_pubset :
4899         break;
4900 
4901       case Qual_class_quote :
4902         break;
4903 
4904       case Qual_class_noquote :
4905         break;
4906 
4907       case Qual_class_label :
4908         break;
4909 
4910       case Qual_class_db_xref :
4911         for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
4912           buf [0] = '\0';
4913           dbt = (DbtagPtr) vnp->data.ptrvalue;
4914           if (dbt != NULL && (! StringHasNoText (dbt->db))) {
4915             oip = dbt->tag;
4916             if (oip != NULL) {
4917 
4918               okay = TRUE;
4919               if (ajp->flags.dropBadDbxref) {
4920                 /* if RELEASE_MODE, drop unknown dbtag */
4921 
4922                 okay = FALSE;
4923                 if (DbxrefIsValid (dbt->db, &is_rf, &is_sc, &is_bc, NULL)) {
4924                   if (is_bc) {
4925                     /* case counts, so suppress if bad case */
4926                   } else if (is_rf && (is_other || is_gps)) {
4927                     /* allow refseq dbxrefs in source feature */
4928                     okay = TRUE;
4929                   } else if (is_sc) {
4930                     /* expect it to be in legalSrcDbXrefs list */
4931                     okay = TRUE;
4932                   } else if (is_est_or_gss) {
4933                     /* EST and GSS records only have source feature, so allow anything */
4934                     okay = TRUE;
4935                   } else {
4936                     /* suppress regular dbxrefs, also warn in validator */
4937                   }
4938                 }
4939 
4940                 /*
4941                 okay = FALSE;
4942                 for (j = 0; legalDbXrefs [j] != NULL; j++) {
4943                   if (StringCmp (dbt->db, legalDbXrefs [j]) == 0) {
4944                     okay = TRUE;
4945                   }
4946                 }
4947                 */
4948               }
4949 
4950               if (okay) {
4951                 if (! StringHasNoText (oip->str)) {
4952                   if (StringLen (dbt->db) + StringLen (oip->str) < 80) {
4953                     sprintf (buf, "%s", oip->str);
4954                   }
4955                 } else {
4956                   sprintf (buf, "%ld", (long) oip->id);
4957                 }
4958               }
4959             }
4960           }
4961           if (StringDoesHaveText (buf) && dbt != NULL) {
4962             FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4963             FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
4964             FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4965           }
4966         }
4967         break;
4968 
4969       case Qual_class_illegal :
4970         break;
4971 
4972       case Qual_class_note :
4973         if (! ajp->flags.srcQualsToNote) {
4974 
4975           /* in sequin_mode and dump_mode, all orgmods and subsources show up as separate /qualifiers */
4976 
4977           for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
4978 
4979             lastomptype = 0;
4980             lastssptype = 0;
4981             switch (asn2gnbk_source_quals [jdx].qualclass) {
4982 
4983               case Qual_class_orgmod :
4984                 if (jdx == SCQUAL_orgmod_note) break;
4985                 omp = qvp [jdx].omp;
4986                 if (lastomptype == 0 && omp != NULL) {
4987                   lastomptype = omp->subtype;
4988                 }
4989                 while (omp != NULL && omp->subtype == lastomptype) {
4990                   if (StringIsJustQuotes (omp->subname)) {
4991                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4992                               FALSE, TRUE, TILDE_IGNORE);
4993                   } else if (! StringHasNoText (omp->subname)) {
4994                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
4995                                       FALSE, TRUE, TILDE_IGNORE);
4996                     FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
4997                                       FALSE, TRUE, TILDE_TO_SPACES);
4998                   }
4999                   omp = omp->next;
5000                 }
5001                 break;
5002 
5003               case Qual_class_voucher :
5004                 if (jdx == SCQUAL_orgmod_note) break;
5005                 omp = qvp [jdx].omp;
5006                 if (lastomptype == 0 && omp != NULL) {
5007                   lastomptype = omp->subtype;
5008                 }
5009                 while (omp != NULL && omp->subtype == lastomptype) {
5010                   if (StringIsJustQuotes (omp->subname)) {
5011                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
5012                               FALSE, TRUE, TILDE_IGNORE);
5013                   } else if (! StringHasNoText (omp->subname)) {
5014                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"",
5015                                       FALSE, TRUE, TILDE_IGNORE);
5016                     FF_www_specimen_voucher(ajp, ffstring, omp->subname);
5017                     FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
5018                   }
5019                   omp = omp->next;
5020                 }
5021                 break;
5022 
5023               case Qual_class_subsource :
5024                 if (jdx == SCQUAL_subsource_note) break;
5025                 ssp = qvp [jdx].ssp;
5026                 if (lastssptype == 0 && ssp != NULL) {
5027                   lastssptype = ssp->subtype;
5028                 }
5029                 while (ssp != NULL && ssp->subtype == lastssptype) {
5030                   if (ssp->subtype == SUBSRC_germline ||
5031                       ssp->subtype == SUBSRC_rearranged ||
5032                       ssp->subtype == SUBSRC_transgenic ||
5033                       ssp->subtype == SUBSRC_environmental_sample ||
5034                       ssp->subtype == SUBSRC_metagenomic) {
5035                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "\n",
5036                                       FALSE, TRUE, TILDE_TO_SPACES);
5037                   } else if (StringIsJustQuotes (ssp->name)) {
5038                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
5039                                       FALSE, TRUE, TILDE_IGNORE);
5040 
5041                   } else if (! StringHasNoText (ssp->name)) {
5042                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
5043                                       FALSE, TRUE, TILDE_IGNORE);
5044                     FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
5045                                       FALSE, TRUE, TILDE_TO_SPACES);
5046                   }
5047                   ssp = ssp->next;
5048                 }
5049                 break;
5050 
5051               default :
5052                 break;
5053             }
5054           }
5055         }
5056 
5057         notestr = NULL;
5058         prefix = "";
5059         add_period = FALSE;
5060 
5061         if (biop->genome == 8) {
5062           FFAddTextToString(unique, "", "extrachromosomal", NULL, FALSE, FALSE, TILDE_IGNORE);
5063           prefix = "\n";
5064         }
5065 
5066         for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
5067 
5068           lastomptype = 0;
5069           lastssptype = 0;
5070           switch (asn2gnbk_source_quals [jdx].qualclass) {
5071 
5072             case Qual_class_string :
5073               if (! StringHasNoText (qvp [jdx].str)) {
5074                 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, FALSE);
5075                 add_period = FALSE;
5076                 prefix = "\n";
5077               }
5078               break;
5079 
5080             case Qual_class_orgmod :
5081             case Qual_class_voucher :
5082               if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_orgmod_note) break;
5083               omp = qvp [jdx].omp;
5084               if (lastomptype == 0 && omp != NULL) {
5085                 lastomptype = omp->subtype;
5086               }
5087               while (omp != NULL && omp->subtype == lastomptype) {
5088                 if (! StringHasNoText (omp->subname)) {
5089                   if (jdx == SCQUAL_orgmod_note) {
5090                     sprintf (buf, "%s", prefix);
5091                   } else {
5092                     sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
5093                   }
5094 
5095                   str = StringSave (omp->subname);
5096                   add_period = s_RemovePeriodFromEnd (str);
5097                   if (jdx == SCQUAL_orgmod_note) {
5098                     FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
5099                   } else {
5100                     FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
5101                   }
5102                   MemFree (str);
5103 
5104                   if (jdx == SCQUAL_orgmod_note) {
5105                     if (add_period) {
5106                       prefix = ".\n";
5107                     } else {
5108                       prefix = ";\n";
5109                     }
5110                   } else {
5111                     prefix = "; ";
5112                   }
5113                 }
5114                 omp = omp->next;
5115               }
5116               break;
5117 
5118             case Qual_class_subsource :
5119               if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_subsource_note) break;
5120               ssp = qvp [jdx].ssp;
5121               if (lastssptype == 0 && ssp != NULL) {
5122                 lastssptype = ssp->subtype;
5123               }
5124               while (ssp != NULL && ssp->subtype == lastssptype) {
5125                 if (ssp->subtype == SUBSRC_germline ||
5126                     ssp->subtype == SUBSRC_rearranged ||
5127                     ssp->subtype == SUBSRC_transgenic ||
5128                     ssp->subtype == SUBSRC_environmental_sample ||
5129                     ssp->subtype == SUBSRC_metagenomic) {
5130                   FFAddTextToString (unique, prefix, asn2gnbk_source_quals [jdx].name, NULL, FALSE, FALSE, TILDE_IGNORE);
5131                   prefix = "; ";
5132                 } else if (! StringHasNoText (ssp->name)) {
5133                   if (jdx == SCQUAL_subsource_note) {
5134                     sprintf (buf, "%s", prefix);
5135                   } else {
5136                     sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
5137                   }
5138 
5139                   str = StringSave (ssp->name);
5140                   add_period = s_RemovePeriodFromEnd (str);
5141                   if (jdx == SCQUAL_subsource_note) {
5142                     FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
5143                   } else {
5144                     FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
5145                   }
5146                   MemFree (str);
5147 
5148                   if (jdx == SCQUAL_subsource_note) {
5149                     if (add_period) {
5150                       prefix = ".\n";
5151                     } else {
5152                       prefix = ";\n";
5153                     }
5154                   } else {
5155                     prefix = "; ";
5156                  }
5157                 }
5158                 ssp = ssp->next;
5159               }
5160               break;
5161 
5162             case Qual_class_pcr :
5163               if (qvp [jdx].ble) {
5164                 lastssptype = 0;
5165                 pset = ParsePCRPrimerString (qvp);
5166                 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
5167                   psp = (PcrSetPtr) vnp->data.ptrvalue;
5168                   if (psp == NULL) continue;
5169                   str = NextPCRPrimerString (psp, TRUE, (Boolean) (pset->next != NULL));
5170                   if (str == NULL) continue;
5171                   if (! StringHasNoText (str)) {
5172                     FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
5173                     add_period = FALSE;
5174                     prefix = "; ";
5175                   }
5176                   MemFree (str);
5177                 }
5178                 FreePCRSet (pset);
5179               }
5180               break;
5181 
5182             case Qual_class_pcr_react :
5183               prp = qvp [jdx].prp;
5184               while (prp != NULL) {
5185                 str = NextPCRReaction (prp, TRUE, (Boolean) (prp->next != NULL));
5186                 if (StringDoesHaveText (str)) {
5187                   FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
5188                   add_period = FALSE;
5189                   prefix = "; ";
5190                 }
5191                 MemFree (str);
5192                 prp = prp->next;
5193               }
5194               break;
5195 
5196             case Qual_class_valnode :
5197               for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
5198                 str = (CharPtr) vnp->data.ptrvalue;
5199                 if (! StringHasNoText (str)) {
5200                   FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
5201                   add_period = FALSE;
5202                   prefix = "; ";
5203                 }
5204               }
5205               break;
5206 
5207             default :
5208               break;
5209           }
5210         }
5211         if ( !FFEmpty(unique) ) {
5212           notestr = FFToCharPtr(unique);
5213 
5214           if (add_period) {
5215             s_AddPeriodToEnd (notestr);
5216           }
5217 
5218 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
5219           if (! IsEllipsis (notestr))
5220             s_RemovePeriodFromEnd (notestr);
5221 #endif
5222 
5223           FFAddOneString (ffstring, "/note=\"", FALSE, FALSE, TILDE_IGNORE);
5224           if (is_desc) {
5225             /* AB055064.1 said TILDE_IGNORE on descriptors, but now changing policy */
5226             FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_IGNORE */ /* TILDE_EXPAND */ TILDE_SEMICOLON);
5227           } else {
5228             /* ASZ93724.1 said TILDE_EXPAND on features, but record does not exist */
5229             FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_EXPAND */ TILDE_SEMICOLON);
5230           }
5231           FFAddOneString (ffstring, "\"", FALSE, FALSE, TILDE_IGNORE);
5232 
5233           MemFree (notestr);
5234         }
5235         break;
5236       default :
5237         break;
5238     }
5239   }
5240 
5241   /* and then deal with the various note types separately (not in order table) */
5242 
5243   if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans &&
5244       (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
5245     sprintf (sfx, "</span>");
5246   }
5247 
5248   str = NULL;
5249 
5250   if (js != NULL) {
5251     str = FFEndPrintEx (ajp, ffstring, afp->format, 21, 21, 5, 21, "FT", js, sfx);
5252   } else {
5253     str = FFEndPrintEx (ajp, ffstring, afp->format, 21, 21, 5, 21, "FT", pfx, sfx);
5254   }
5255 
5256   MemFree (js);
5257 
5258   /* optionally populate gbseq for XML-ized GenBank format */
5259 
5260   if (gbseq != NULL) {
5261     if (gbfeat != NULL) {
5262       AddFeatureToGbseq (gbseq, gbfeat, str, NULL);
5263     }
5264   }
5265 
5266   FFRecycleString(ajp, unique);
5267   FFRecycleString(ajp, ffstring);
5268   return str;
5269 }
5270 
CountBasesByStream(CharPtr sequence,Pointer userdata)5271 static void LIBCALLBACK CountBasesByStream (
5272   CharPtr sequence,
5273   Pointer userdata
5274 )
5275 
5276 {
5277   Int4Ptr  base_count;
5278   Char     ch;
5279   CharPtr  ptr;
5280 
5281   base_count = (Int4Ptr) userdata;
5282 
5283   ptr = sequence;
5284   ch = *ptr;
5285   while (ch != '\0') {
5286     ch = TO_UPPER (ch);
5287     switch (ch) {
5288       case 'A' :
5289         (base_count [0])++;
5290         break;
5291       case 'C' :
5292         (base_count [1])++;
5293         break;
5294       case 'G' :
5295         (base_count [2])++;
5296         break;
5297       case 'T' :
5298         (base_count [3])++;
5299         break;
5300       default :
5301         (base_count [4])++;
5302         break;
5303     }
5304     ptr++;
5305     ch = *ptr;
5306   }
5307 }
5308 
FormatBasecountBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)5309 NLM_EXTERN CharPtr FormatBasecountBlock (
5310   Asn2gbFormatPtr afp,
5311   BaseBlockPtr bbp
5312 )
5313 
5314 {
5315   IntAsn2gbJobPtr  ajp;
5316   Asn2gbSectPtr    asp;
5317   Int4             base_count [5];
5318   BioseqPtr        bsp;
5319   Char             buf [80];
5320   Int2             i;
5321   Int4             len;
5322   StringItemPtr    ffstring;
5323   CharPtr          str;
5324 
5325   if (afp == NULL || bbp == NULL) return NULL;
5326   ajp = afp->ajp;
5327   if (ajp == NULL) return NULL;
5328 
5329   asp = afp->asp;
5330   if (asp == NULL) return NULL;
5331   bsp = (asp->bsp);
5332   if (bsp == NULL) return NULL;
5333 
5334   /* after first formatting, result is cached into bbp->string */
5335 
5336   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
5337 
5338   for (i = 0; i < 5; i++) {
5339     base_count [i] = 0;
5340   }
5341 
5342   if (ajp->ajp.slp != NULL) {
5343     len = SeqLocLen (ajp->ajp.slp);
5344     SeqPortStreamLoc (ajp->ajp.slp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
5345   } else {
5346     len = bsp->length;
5347     SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
5348   }
5349 
5350   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
5351 
5352     if (base_count [4] == 0) {
5353       sprintf (buf, "%7ld a%7ld c%7ld g%7ld t",
5354                (long) base_count [0], (long) base_count [1],
5355                (long) base_count [2], (long) base_count [3]);
5356     } else {
5357       sprintf (buf, "%7ld a%7ld c%7ld g%7ld t%7ld others",
5358                (long) base_count [0], (long) base_count [1],
5359                (long) base_count [2], (long) base_count [3],
5360                (long) base_count [4]);
5361     }
5362 
5363   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
5364 
5365     sprintf (buf, "Sequence %ld BP; %ld A; %ld C; %ld G; %ld T; %ld other;",
5366              (long) len,
5367              (long) base_count [0], (long) base_count [1],
5368              (long) base_count [2], (long) base_count [3],
5369              (long) base_count [4]);
5370   }
5371 
5372   ffstring = FFGetString(ajp);
5373   if ( ffstring == NULL ) return NULL;
5374 
5375   if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
5376     FFAddOneString(ffstring, "XX\n", FALSE, FALSE, TILDE_IGNORE);
5377   }
5378   FFStartPrint (ffstring, afp->format, 0, 0, "BASE COUNT", 12, 5, 5, "SQ", FALSE);
5379   FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5380   str = FFEndPrint(ajp, ffstring, afp->format, 12, 0, 5, 5, "SQ");
5381   FFRecycleString(ajp, ffstring);
5382 
5383   return str;
5384 }
5385 
PrintSeqLine(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,CharPtr buf,BIG_ID gi,CharPtr currAccVer,Int4 startwithoutgap,Int4 start,Int4 stop)5386 static void PrintSeqLine (
5387   IntAsn2gbJobPtr ajp,
5388   StringItemPtr ffstring,
5389   FmtType format,
5390   CharPtr buf,
5391   BIG_ID gi,
5392   CharPtr currAccVer,
5393   Int4 startwithoutgap,
5394   Int4 start,
5395   Int4 stop
5396 )
5397 
5398 {
5399   size_t  len;
5400   Char    pos [16];
5401   Int4    pad;
5402   Char    tmp [64];
5403 
5404   len = StringLen (buf);
5405   if (len > 0 && buf [len - 1] == ' ') {
5406     buf [len - 1] = '\0';
5407   }
5408 
5409   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
5410 
5411     sprintf (pos, "%9ld", (long) (start + 1));
5412     FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
5413     FFAddOneChar(ffstring, ' ', FALSE);
5414     if (ajp != NULL && GetWWW (ajp) && ajp->seqspans) {
5415       sprintf (tmp, "<span class=\"ff_line\" id=\"gi_%s_%ld\">", currAccVer, (long) (startwithoutgap + 1));
5416       FFAddOneString(ffstring, tmp, FALSE, FALSE, TILDE_TO_SPACES);
5417     }
5418     FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5419     if (ajp != NULL && GetWWW (ajp) && ajp->seqspans) {
5420       FFAddOneString(ffstring, "</span>", FALSE, FALSE, TILDE_TO_SPACES);
5421     }
5422     FFAddOneChar(ffstring, '\n', FALSE);
5423   } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
5424 
5425     sprintf (pos, "%8ld", (long) (stop));
5426     FFAddNChar(ffstring, ' ', 5, FALSE);
5427     FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5428     pad = 72 - 5 - StringLen(buf);
5429     FFAddNChar(ffstring, ' ', pad, FALSE);
5430     FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
5431     FFAddOneChar(ffstring, '\n', FALSE);
5432   }
5433 }
5434 
CompressNonBases(CharPtr str)5435 static CharPtr CompressNonBases (CharPtr str)
5436 
5437 {
5438   Char     ch;
5439   CharPtr  dst;
5440   CharPtr  ptr;
5441 
5442   if (str == NULL || str [0] == '\0') return NULL;
5443 
5444   dst = str;
5445   ptr = str;
5446   ch = *ptr;
5447   while (ch != '\0') {
5448     if (IS_ALPHA (ch)) {
5449       *dst = ch;
5450       dst++;
5451     }
5452     ptr++;
5453     ch = *ptr;
5454   }
5455   *dst = '\0';
5456 
5457   return str;
5458 }
5459 
5460   static Uint1 fasta_order [NUM_SEQID] = {
5461     33, /* 0 = not set */
5462     20, /* 1 = local Object-id */
5463     15, /* 2 = gibbsq */
5464     16, /* 3 = gibbmt */
5465     30, /* 4 = giim Giimport-id */
5466     10, /* 5 = genbank */
5467     10, /* 6 = embl */
5468     10, /* 7 = pir */
5469     10, /* 8 = swissprot */
5470     15, /* 9 = patent */
5471     10, /* 10 = other = refseq */
5472     20, /* 11 = general Dbtag */
5473     255, /* 12 = gi */
5474     10, /* 13 = ddbj */
5475     10, /* 14 = prf */
5476     12, /* 15 = pdb */
5477     10, /* 16 = tpg */
5478     10, /* 17 = tpe */
5479     10, /* 18 = tpd */
5480     15, /* 19 = gpp */
5481     15  /* 20 = nat */
5482   };
5483 
PrintGenome(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqLocPtr slp_head,CharPtr prefix,Boolean segWithParts,Boolean is_na)5484 static void PrintGenome (
5485   IntAsn2gbJobPtr ajp,
5486   StringItemPtr ffstring,
5487   SeqLocPtr slp_head,
5488   CharPtr prefix,
5489   Boolean segWithParts,
5490   Boolean is_na
5491 )
5492 {
5493   Char         buf[128], /* gibuf [32], */ vbuf [128];
5494   Boolean      first = TRUE;
5495   SeqIdPtr     freeid = NULL, sid = NULL, newid = NULL;
5496   SeqLocPtr    slp = NULL;
5497   Int4         start = 0, stop = 0;
5498   BIG_ID       gi = 0;
5499   Char         currAccVer [SEQID_MAX_LEN];
5500   BioseqPtr    bsp = NULL;
5501   Int2         p1 = 0, p2 = 0;
5502 
5503   buf [0] = '\0';
5504   /* gibuf [0] = '\0'; */
5505   vbuf [0] = '\0';
5506   for (slp = slp_head; slp; slp = slp->next) {
5507     sid = SeqLocId (slp);
5508     if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT || slp->choice == SEQLOC_WHOLE) {
5509       start = SeqLocStart (slp);
5510       stop = SeqLocStop (slp);
5511     } else if (slp->choice == SEQLOC_NULL) {
5512       sprintf (vbuf, ",%s", "gap()");
5513       FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5514       continue;
5515     } else {
5516       continue;
5517     }
5518     if (sid == NULL) {
5519       continue;
5520     }
5521     newid = NULL;
5522     freeid = NULL;
5523     buf [0] = '\0';
5524     gi = 0;
5525     if (sid->choice == SEQID_GI) {
5526       gi = (BIG_ID) sid->data.intvalue;
5527       if (GetAccnVerFromServer (gi, buf)) {
5528       } else {
5529         newid = GetSeqIdForGI (gi);
5530         if (newid != NULL) {
5531           freeid = newid;
5532         }
5533         if (newid != NULL && segWithParts) {
5534           if (newid->choice == SEQID_GIBBSQ ||
5535               newid->choice == SEQID_GIBBMT ||
5536               newid->choice == SEQID_GIIM) {
5537             bsp = BioseqFind (newid);
5538             if (bsp != NULL && bsp->repr == Seq_repr_virtual) {
5539               if (bsp->length > 0) {
5540                 sprintf (vbuf, ",gap(%ld)", (long) bsp->length);
5541               } else {
5542                 sprintf (vbuf, ",%s", "gap()");
5543               }
5544               FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5545               continue;
5546             }
5547           }
5548         }
5549       }
5550     } else if (sid->choice == SEQID_GENERAL) {
5551       newid = sid;
5552     } else {
5553       newid = sid;
5554       gi = GetGIForSeqId (sid);
5555     }
5556     if (prefix != NULL) {
5557       FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
5558     }
5559     if (first) {
5560       first = FALSE;
5561     } else {
5562       FFAddOneChar (ffstring, ',', FALSE);
5563       /*ff_AddChar(',');*/
5564     }
5565     if (! StringHasNoText (buf)) {
5566       /* filled in by GetAccnVerFromServer */
5567     } else if (newid != NULL) {
5568       SeqIdWrite (SeqIdSelect (newid, fasta_order, NUM_SEQID),
5569                  buf, PRINTID_TEXTID_ACC_VER, sizeof(buf) -1 );
5570     } else if (sid->choice == SEQID_GI) {
5571       SeqIdWrite (sid, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
5572     }
5573 
5574     if (SeqLocStrand (slp) == Seq_strand_minus) {
5575       FFAddOneString (ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
5576     }
5577     currAccVer [0] = '\0';
5578     if (bsp != NULL) {
5579       GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, FALSE);
5580     }
5581     if ( GetWWW (ajp) && StringDoesHaveText (currAccVer)) {
5582       if (newid == NULL) {
5583         newid = sid;
5584       }
5585       if (newid->choice != SEQID_GENERAL) {
5586         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5587         if (is_na) {
5588           FF_Add_NCBI_Base_URL (ffstring, link_seqn);
5589         } else {
5590           FF_Add_NCBI_Base_URL (ffstring, link_seqp);
5591         }
5592         /* sprintf (gibuf, "%ld", (long) gi); */
5593         FFAddTextToString (ffstring, /* "val=" */ NULL, currAccVer, "\">", FALSE, FALSE, TILDE_IGNORE);
5594         FFAddTextToString (ffstring, NULL, buf, "</a>", FALSE, FALSE, TILDE_IGNORE);
5595       }
5596     } else {
5597       FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
5598     }
5599 
5600     if (SeqLocStrand (slp) == Seq_strand_minus) {
5601       sprintf (vbuf,":%ld..%ld)", (long) start+1, (long) stop+1);
5602     } else {
5603       sprintf (vbuf,":%ld..%ld", (long) start+1, (long) stop+1);
5604     }
5605     FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5606     p1 += StringLen (vbuf);
5607     p2 += StringLen (vbuf);
5608     if (freeid != NULL) {
5609       freeid = SeqIdFree (freeid);
5610     }
5611   }
5612 }
5613 
RevCompDelta(DeltaSeqPtr seq_ext)5614 static DeltaSeqPtr RevCompDelta (
5615   DeltaSeqPtr seq_ext
5616 )
5617 
5618 {
5619   DeltaSeqPtr  dsp;
5620   ValNodePtr   head = NULL;
5621   Int4         from, to, tmp;
5622   SeqLocPtr    nslp, slp;
5623   Boolean      partial5, partial3;
5624   SeqIntPtr    sintp;
5625   SeqLitPtr    slitp, slip;
5626   ValNodePtr   vnp;
5627 
5628   for (dsp = seq_ext; dsp != NULL; dsp = dsp->next) {
5629     vnp = NULL;
5630 
5631     if (dsp->choice == 1) {
5632 
5633       slp = (SeqLocPtr) dsp->data.ptrvalue;
5634       if (slp != NULL) {
5635 
5636         if (slp->choice == SEQLOC_NULL) {
5637 
5638           nslp = ValNodeAddPointer (NULL, SEQLOC_NULL, NULL);
5639 
5640           vnp = ValNodeAddPointer (NULL, 1, nslp);
5641 
5642         } else if (slp->choice == SEQLOC_INT) {
5643 
5644           sintp = (SeqIntPtr) slp->data.ptrvalue;
5645           if (sintp != NULL) {
5646             CheckSeqLocForPartial (slp, &partial5, &partial3);
5647             from = sintp->from;
5648             to = sintp->to;
5649             if (sintp->strand != Seq_strand_minus) {
5650               tmp = from;
5651               from = to;
5652               to = tmp;
5653             }
5654             nslp = AddIntervalToLocation (NULL, sintp->id, from, to, partial3, partial5);
5655 
5656             vnp = ValNodeAddPointer (NULL, 1, nslp);
5657 
5658           }
5659         }
5660       }
5661 
5662     } else if (dsp->choice == 2) {
5663 
5664       slitp = (SeqLitPtr) dsp->data.ptrvalue;
5665       if (slitp != NULL && slitp->seq_data == NULL) {
5666         slip = SeqLitNew ();
5667         if (slip != NULL) {
5668           slip->length = slitp->length;
5669           /* not copying fuzz */
5670           slip->seq_data_type = slitp->seq_data_type;
5671           vnp = ValNodeAddPointer (NULL, 2, (Pointer) slip);
5672         }
5673       } else {
5674         ValNodeFree (head);
5675         return NULL;
5676       }
5677     }
5678 
5679     /* save in new list in reverse order */
5680 
5681     if (vnp != NULL) {
5682       vnp->next = head;
5683       head = vnp;
5684     }
5685   }
5686 
5687   return head;
5688 }
5689 
FormatContigBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)5690 NLM_EXTERN CharPtr FormatContigBlock (
5691   Asn2gbFormatPtr afp,
5692   BaseBlockPtr bbp
5693 )
5694 
5695 {
5696   IntAsn2gbJobPtr  ajp;
5697   Asn2gbSectPtr    asp;
5698   BioseqPtr        bsp;
5699   DeltaSeqPtr      delta_head = NULL;
5700   DeltaSeqPtr      dsp;
5701   DeltaSeqPtr      dspnext;
5702   IntFuzzPtr       fuzz;
5703   GBSeqPtr         gbseq;
5704   Boolean          is_na;
5705   SeqLitPtr        litp;
5706   DeltaSeqPtr      new_delta = NULL;
5707   CharPtr          prefix = NULL;
5708   Boolean          rev_comp = FALSE;
5709   Boolean          segWithParts = FALSE;
5710   SeqIntPtr        sintp;
5711   SeqLocPtr        slp;
5712   SeqLocPtr        slp_head = NULL;
5713   CharPtr          str;
5714   Char             tmp [16];
5715   Boolean          unknown;
5716   Char             vbuf [32];
5717   StringItemPtr    ffstring;
5718 /*  CharPtr          label;*/
5719 
5720   if (afp == NULL || bbp == NULL) return NULL;
5721   ajp = afp->ajp;
5722   if (ajp == NULL) return NULL;
5723   asp = afp->asp;
5724   if (asp == NULL) return NULL;
5725   bsp = (asp->bsp);
5726   if (bsp == NULL) return NULL;
5727 
5728   ffstring = FFGetString (ajp);
5729   if ( ffstring == NULL ) return NULL;
5730 
5731   is_na = ISA_na (bsp->mol);
5732 
5733   if (ajp->ajp.slp != NULL) {
5734     slp = ajp->ajp.slp;
5735     if (slp->choice == SEQLOC_INT) {
5736       sintp = (SeqIntPtr) slp->data.ptrvalue;
5737       if (sintp != NULL) {
5738         if (sintp->from == 0 && sintp->to == bsp->length - 1 && sintp->strand == Seq_strand_minus) {
5739           rev_comp = TRUE;
5740         }
5741       }
5742     }
5743   }
5744 
5745   FFStartPrint (ffstring, afp->format, 0, 0, "CONTIG", 12, 5, 5, "CO", FALSE);
5746   /*
5747   if ( GetWWW(ajp) ) {
5748     label = "CONTIG   ";
5749   } else {
5750     label = "CONTIG";
5751   }
5752 
5753   FFAddOneString(ffstring, label,  FALSE, FALSE, TILDE_IGNORE);
5754   FFAddNChar(ffstring, ' ', 12 - StringLen(label), FALSE);
5755   */
5756 
5757   FFAddOneString (ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
5758 
5759   if (bsp->seq_ext_type == 1) {
5760 
5761     if (bsp->repr == Seq_repr_seg && SegHasParts (bsp)) {
5762       segWithParts = TRUE;
5763     }
5764 
5765     slp_head = (SeqLocPtr) bsp->seq_ext;
5766     PrintGenome (ajp, ffstring, slp_head, prefix, segWithParts, is_na);
5767 
5768   } else if (bsp->seq_ext_type == 4) {
5769 
5770     if (rev_comp) {
5771       new_delta = RevCompDelta ((DeltaSeqPtr) bsp->seq_ext);
5772       delta_head = new_delta;
5773     } else {
5774       delta_head = (DeltaSeqPtr) bsp->seq_ext;
5775     }
5776 
5777     for (dsp = delta_head; dsp != NULL; dsp = dsp->next) {
5778       if (dsp->choice == 1) {
5779 
5780         slp_head = (SeqLocPtr) dsp->data.ptrvalue;
5781         PrintGenome (ajp, ffstring, slp_head, prefix, FALSE, is_na);
5782 
5783       } else {
5784 
5785         litp = (SeqLitPtr) dsp->data.ptrvalue;
5786         if (litp != NULL) {
5787           if (litp->seq_data != NULL && litp->seq_data_type != Seq_code_gap) {
5788             if (litp->length == 0) {
5789               sprintf (vbuf, "gap(%ld)", (long) litp->length);
5790               FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5791             } else {
5792               /* don't know what to do here */
5793             }
5794           } else {
5795             unknown = FALSE;
5796             fuzz = litp->fuzz;
5797             if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
5798               unknown = TRUE;
5799             }
5800             if (unknown && litp->length > 0) {
5801               sprintf (tmp, "unk%ld", (long) litp->length);
5802             } else {
5803               sprintf (tmp, "%ld", (long) litp->length);
5804             }
5805             if (prefix != NULL) {
5806               sprintf (vbuf, "%sgap(%s)", prefix, tmp);
5807             } else {
5808               sprintf (vbuf, "gap(%s)", tmp);
5809             }
5810             FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5811           }
5812         }
5813       }
5814 
5815       prefix = ",";
5816     }
5817 
5818   } else if (bsp->seq_ext_type == 2) {
5819 
5820     slp = (SeqLocPtr) bsp->seq_ext;
5821     PrintGenome (ajp, ffstring, slp, prefix, segWithParts, is_na);
5822 
5823   }
5824 
5825   FFAddOneChar (ffstring, ')', FALSE);
5826 
5827   str = FFEndPrint (ajp, ffstring, afp->format, 12, 12, 5, 5, "CO");
5828   FFRecycleString (ajp, ffstring);
5829 
5830   /* optionally populate gbseq for XML-ized GenBank format */
5831 
5832   if (ajp->gbseq) {
5833     gbseq = &asp->gbseq;
5834   } else {
5835     gbseq = NULL;
5836   }
5837 
5838   if (gbseq != NULL) {
5839     if (StringLen (str) > 12) {
5840       gbseq->contig = StringSave (str + 12);
5841     } else {
5842       gbseq->contig = StringSave (str);
5843     }
5844 
5845     CleanQualValue (gbseq->contig);
5846     Asn2gnbkCompressSpaces (gbseq->contig);
5847     StripAllSpaces (gbseq->contig);
5848   }
5849 
5850   if (new_delta != NULL) {
5851     dsp = new_delta;
5852     while (dsp != NULL) {
5853       dspnext = dsp->next;
5854       dsp->next = NULL;
5855       DeltaSeqFree (dsp);
5856       dsp = dsp->next;
5857     }
5858   }
5859 
5860   return str;
5861 }
5862 
SaveGBSeqSequence(CharPtr sequence,Pointer userdata)5863 static void LIBCALLBACK SaveGBSeqSequence (
5864   CharPtr sequence,
5865   Pointer userdata
5866 )
5867 
5868 {
5869   CharPtr       tmp;
5870   CharPtr PNTR  tmpp;
5871 
5872   tmpp = (CharPtr PNTR) userdata;
5873   tmp = *tmpp;
5874 
5875   tmp = StringMove (tmp, sequence);
5876 
5877   *tmpp = tmp;
5878 }
5879 
InGapBlock(IntAsn2gbJobPtr ajp)5880 static Boolean InGapBlock (
5881   IntAsn2gbJobPtr ajp
5882 )
5883 
5884 {
5885   return (Boolean) (ajp->seqGapCurrLen > 0);
5886 }
5887 
LineIsAllGaps(CharPtr ptr)5888 static Boolean LineIsAllGaps (
5889   CharPtr ptr
5890 )
5891 
5892 {
5893   Char  ch;
5894   Int2  j;
5895 
5896   for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5897     if (ch != '-') return FALSE;
5898   }
5899   if (j == 60) return TRUE;
5900   return FALSE;
5901 }
5902 
GapAtStart(CharPtr ptr)5903 static Int2 GapAtStart (
5904   CharPtr ptr
5905 )
5906 
5907 {
5908   Char  ch;
5909   Int2  j;
5910 
5911   for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5912     if (ch != '-') return j;
5913   }
5914   return j;
5915 }
5916 
FixGapAtStart(CharPtr ptr,Char pad)5917 static void FixGapAtStart (
5918   CharPtr ptr,
5919   Char pad
5920 )
5921 
5922 {
5923   Char  ch;
5924   Int2  j;
5925 
5926   for (ch = *ptr, j = 0; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
5927     *ptr = pad;
5928   }
5929 }
5930 
GapAtEnd(CharPtr ptr)5931 static Int2 GapAtEnd (
5932   CharPtr ptr
5933 )
5934 
5935 {
5936   Char  ch;
5937   Int2  j;
5938   Int2  k;
5939 
5940   for (ch = *ptr, j = 0, k = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5941     if (ch == '-') {
5942       k++;
5943     } else {
5944       k = 0;
5945     }
5946   }
5947   return k;
5948 }
5949 
FixGapAtEnd(CharPtr ptr,Char pad)5950 static void FixGapAtEnd (
5951   CharPtr ptr,
5952   Char pad
5953 )
5954 
5955 {
5956   Char  ch;
5957   Int2  j;
5958 
5959   j = StringLen (ptr) - GapAtEnd (ptr);
5960   ptr += j;
5961   for (ch = *ptr; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
5962     *ptr = pad;
5963   }
5964 }
5965 
FixRemainingGaps(CharPtr ptr,Char pad)5966 static void FixRemainingGaps (
5967   CharPtr ptr,
5968   Char pad
5969 )
5970 
5971 {
5972   Char  ch;
5973   Int2  j;
5974 
5975   for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5976     if (ch == '-') {
5977       *ptr = pad;
5978     }
5979   }
5980 }
5981 
ExpandSeqLine(CharPtr buf)5982 static void ExpandSeqLine (
5983   CharPtr buf
5984 )
5985 
5986 {
5987   Char     ch;
5988   Int2     blk, count, lin;
5989   CharPtr  ptr;
5990   Char     seq [80];
5991 
5992   StringCpy (seq, buf);
5993 
5994   count = 0;
5995   blk = 0;
5996   lin = 0;
5997 
5998   ptr = seq;
5999   ch = *ptr;
6000 
6001   while (ch != '\0') {
6002     buf [count] = ch;
6003     count++;
6004     ptr++;
6005     ch = *ptr;
6006 
6007     blk++;
6008     lin++;
6009     if (blk >= 10 && lin < 60) {
6010 
6011       buf [count] = ' ';
6012       count++;
6013       blk = 0;
6014 
6015     }
6016   }
6017 
6018   buf [count] = '\0';
6019 }
6020 
ProcessGapSpecialFormat(Asn2gbFormatPtr afp,IntAsn2gbJobPtr ajp,BioseqPtr bsp,StringItemPtr ffstring,CharPtr buf,CharPtr nextchars)6021 static Int2 ProcessGapSpecialFormat (
6022   Asn2gbFormatPtr afp,
6023   IntAsn2gbJobPtr ajp,
6024   BioseqPtr bsp,
6025   StringItemPtr ffstring,
6026   CharPtr buf,
6027   CharPtr nextchars
6028 )
6029 
6030 {
6031   Char      fmt_buf [64];
6032   Char      gapbuf [80];
6033   /*
6034   BIG_ID    gi;
6035   Char      gi_buf [32];
6036   */
6037   Char      currAccVer [SEQID_MAX_LEN];
6038   Boolean   is_na;
6039   Char      pad;
6040   Char      rgn_buf [64];
6041   SeqIdPtr  sip;
6042   SeqLocPtr slp;
6043   Int2      startgapgap = 0, endgap = 0;
6044   Int4      from, to;
6045 
6046   is_na = ISA_na (bsp->mol);
6047   if (is_na) {
6048     pad = 'n';
6049   } else {
6050     pad = 'x';
6051   }
6052 
6053   if (LineIsAllGaps (buf)) {
6054     ajp->seqGapCurrLen += StringLen (buf);
6055     *buf = '\0';
6056     return 0;
6057   }
6058 
6059   startgapgap = GapAtStart (buf);
6060   if (InGapBlock (ajp)) {
6061     ajp->seqGapCurrLen += startgapgap;
6062     if (is_na) {
6063       sprintf (gapbuf, "          [gap %ld bp]", (long) ajp->seqGapCurrLen);
6064     } else {
6065       sprintf (gapbuf, "          [gap %ld aa]", (long) ajp->seqGapCurrLen);
6066     }
6067     FFAddOneString (ffstring, gapbuf, FALSE, FALSE, TILDE_TO_SPACES);
6068     if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && afp != NULL &&
6069       (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
6070       /*
6071       gi = 0;
6072       for (sip = bsp->id; sip != NULL; sip = sip->next) {
6073         if (sip->choice == SEQID_GI) {
6074           gi = (BIG_ID) sip->data.intvalue;
6075         }
6076       }
6077       */
6078       currAccVer [0] = '\0';
6079       GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, FALSE);
6080       if (StringDoesHaveText (currAccVer)) {
6081         /* sprintf(gi_buf, "%ld", (long) gi); */
6082         sprintf(fmt_buf, "?fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY);
6083         if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
6084           StringCat (fmt_buf, "&report=gbwithparts");
6085           if (ajp->ajp.slp != NULL) {
6086             slp = ajp->ajp.slp;
6087             from = SeqLocStart (slp) + 1;
6088             to = SeqLocStop (slp) + 1;
6089             sprintf (rgn_buf, "&from=%ld&to=%ld", (long) from, (long) to);
6090             StringCat (fmt_buf, rgn_buf);
6091           }
6092         }
6093         FFAddOneString (ffstring, "    <a href=\"", FALSE, FALSE, TILDE_IGNORE);
6094         if (is_na) {
6095           FF_Add_NCBI_Base_URL (ffstring, link_featn);
6096         } else {
6097           FF_Add_NCBI_Base_URL (ffstring, link_featp);
6098         }
6099         FFAddOneString (ffstring, currAccVer, FALSE, FALSE, TILDE_IGNORE);
6100         FFAddOneString (ffstring, fmt_buf, FALSE, FALSE, TILDE_IGNORE);
6101         FFAddOneString (ffstring, "\">Expand Ns", FALSE, FALSE, TILDE_IGNORE);
6102         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
6103       }
6104     }
6105     FFAddOneChar (ffstring, '\n', FALSE);
6106     ajp->seqGapCurrLen = 0;
6107     FixGapAtStart (buf, ' ');
6108   } else if (startgapgap > 0) {
6109     FixGapAtStart (buf, pad);
6110     startgapgap = 0;
6111   }
6112 
6113   endgap = GapAtEnd (buf);
6114   if (LineIsAllGaps (nextchars)) {
6115     FixGapAtEnd (buf, ' ');
6116     ajp->seqGapCurrLen += endgap;
6117   } else if (endgap > 0) {
6118     /*
6119     FixGapAtEnd (buf, pad);
6120     */
6121     FixGapAtEnd (buf, ' ');
6122     ajp->seqGapCurrLen += endgap;
6123   }
6124 
6125   FixRemainingGaps (buf, pad);
6126 
6127   return startgapgap;
6128 }
6129 
6130 /*
6131 static void ChangeOandJtoX (CharPtr str)
6132 
6133 {
6134   Char  ch;
6135 
6136   if (str == NULL) return;
6137   ch = *str;
6138   while (ch != '\0') {
6139     if (ch == 'O' || ch == 'J') {
6140       *str = 'X';
6141     } else if (ch == 'o' || ch == 'j') {
6142       *str = 'x';
6143     }
6144     str++;
6145     ch = *str;
6146   }
6147 }
6148 */
6149 
FormatSequenceBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)6150 NLM_EXTERN CharPtr FormatSequenceBlock (
6151   Asn2gbFormatPtr afp,
6152   BaseBlockPtr bbp
6153 )
6154 
6155 {
6156   IntAsn2gbJobPtr   ajp;
6157   Asn2gbSectPtr     asp;
6158   Int2              blk;
6159   BioseqPtr         bsp;
6160   Bioseq            bsq;
6161   Char              buf [80];
6162   Char              ch;
6163   Int2              count;
6164   Int4              extend;
6165   StreamFlgType     flags = STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL;
6166   GBSeqPtr          gbseq;
6167   BIG_ID            gi = 0;
6168   IntAsn2gbSectPtr  iasp;
6169   Int2              lin;
6170   SeqLocPtr         loc;
6171   Int4              num;
6172   CharPtr           ptr;
6173   Int4              remaining;
6174   SeqBlockPtr       sbp;
6175   SeqIdPtr          sip;
6176   SeqLoc            sl;
6177   SeqLocPtr         slp;
6178   Int4              start;
6179   Int2              startgapgap;
6180   Int4              stop;
6181   CharPtr           str = NULL;
6182   CharPtr           tmp;
6183   StringItemPtr     ffstring;
6184   Char              currAccVer [SEQID_MAX_LEN];
6185 
6186   if (afp == NULL || bbp == NULL) return NULL;
6187   sbp = (SeqBlockPtr) bbp;
6188   ajp = afp->ajp;
6189   if (ajp == NULL) return NULL;
6190   asp = afp->asp;
6191   if (asp == NULL) return NULL;
6192   iasp = (IntAsn2gbSectPtr) asp;
6193   bsp = (asp->bsp);
6194   if (bsp == NULL) return NULL;
6195 
6196   /* if GBSeq XML, use SeqPortStream on single block */
6197 
6198   if (ajp->gbseq) {
6199     gbseq = &asp->gbseq;
6200 
6201     if (ajp->ajp.slp != NULL) {
6202       slp = ajp->ajp.slp;
6203       str = MemNew (sizeof (Char) * (SeqLocLen (slp) + 10));
6204     } else {
6205       str = MemNew (sizeof (Char) * (bsp->length + 10));
6206     }
6207     if (str == NULL) return NULL;
6208 
6209     tmp = str;
6210     if (ajp->ajp.slp != NULL) {
6211       slp = ajp->ajp.slp;
6212       SeqPortStreamLoc (slp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
6213     } else {
6214       SeqPortStream (bsp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
6215     }
6216     /*
6217     if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
6218       if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6219         ChangeOandJtoX (str);
6220       }
6221     }
6222     */
6223     gbseq->sequence = StringSave (str);
6224 
6225     tmp = gbseq->sequence;
6226     if (tmp == NULL) return NULL;
6227     ch = *tmp;
6228     while (ch != '\0') {
6229       if (ch == '\n' || ch == '\r' || ch == '\t') {
6230         *tmp = ' ';
6231       } else if (IS_UPPER (ch)) {
6232         /* collab decision to present target sequence in lower case */
6233         *tmp = TO_LOWER (ch);
6234       }
6235       tmp++;
6236       ch = *tmp;
6237     }
6238     TrimSpacesAroundString (gbseq->sequence);
6239     CompressNonBases (gbseq->sequence);
6240 
6241     return str;
6242   }
6243 
6244   /* replace SeqPort with improved SeqPortStream */
6245 
6246   if (sbp->bases == NULL) {
6247     if (ajp->specialGapFormat) {
6248       flags = EXPAND_GAPS_TO_DASHES | STREAM_CORRECT_INVAL;
6249     }
6250 
6251     start = sbp->start;
6252     stop = sbp->stop;
6253     extend = sbp->extend;
6254 
6255     if (stop > start) {
6256 
6257       str = MemNew (sizeof (Char) * (extend - start + 3));
6258       if (str != NULL) {
6259         if (ajp->ajp.slp != NULL) {
6260           slp = ajp->ajp.slp;
6261           MemSet ((Pointer) &bsq, 0, sizeof (Bioseq));
6262           MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
6263           bsq.repr = Seq_repr_seg;
6264           bsq.mol = bsp->mol;
6265           bsq.seq_ext_type = 1;
6266           bsq.length = SeqLocLen (slp);
6267           bsq.seq_ext = &sl;
6268           if (slp->choice == SEQLOC_MIX || slp->choice == SEQLOC_PACKED_INT) {
6269             loc = (SeqLocPtr) slp->data.ptrvalue;
6270             if (loc != NULL) {
6271               sl.choice = loc->choice;
6272               sl.data.ptrvalue = (Pointer) loc->data.ptrvalue;
6273               sl.next = loc->next;
6274             }
6275           } else {
6276             sl.choice = slp->choice;
6277             sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
6278             sl.next = NULL;
6279           }
6280           SeqPortStreamInt (&bsq, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
6281         } else {
6282           num = SeqPortStreamInt (bsp, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
6283           if (num < 1) {
6284             /* flag possible inconsistency between bsp->length and actual sequence data length */
6285             ajp->relModeError = TRUE;
6286             return NULL;
6287           }
6288         }
6289         /*
6290         if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
6291           if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6292             ChangeOandJtoX (str);
6293           }
6294         }
6295         */
6296         sbp->bases = str;
6297       }
6298     }
6299   }
6300 
6301   if (sbp->bases == NULL) return NULL;
6302 
6303   for (sip = bsp->id; sip != NULL; sip = sip->next) {
6304    if (sip->choice != SEQID_GI) continue;
6305    gi = (BIG_ID) sip->data.intvalue;
6306   }
6307 
6308   currAccVer [0] = '\0';
6309   GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, TRUE);
6310 
6311   /* format subsequence cached with SeqPortStream */
6312 
6313   ffstring = FFGetString (ajp);
6314 
6315   start = sbp->start;
6316   stop = sbp->stop;
6317   remaining = stop - start;
6318 
6319   count = 0;
6320   blk = 0;
6321   lin = 0;
6322 
6323   ptr = sbp->bases;
6324   ch = *ptr;
6325 
6326   while (ch != '\0' && remaining > 0) {
6327     buf [count] = (Char) (TO_LOWER (ch));
6328     count++;
6329     remaining--;
6330     ptr++;
6331     ch = *ptr;
6332 
6333     blk++;
6334     lin++;
6335     if (lin >= 60) {
6336 
6337       buf [count] = '\0';
6338       startgapgap = 0;
6339       if (ajp->specialGapFormat) {
6340         startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
6341       }
6342       if (StringDoesHaveText (buf)) {
6343         ExpandSeqLine (buf);
6344         PrintSeqLine (ajp, ffstring, afp->format, buf, gi, currAccVer, start, start + startgapgap, start + lin);
6345       }
6346       count = 0;
6347       blk = 0;
6348       lin = 0;
6349       start += 60;
6350     }
6351   }
6352 
6353   buf [count] = '\0';
6354   if (count > 0) {
6355     startgapgap = 0;
6356     if (ajp->specialGapFormat) {
6357       startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
6358     }
6359     if (StringDoesHaveText (buf)) {
6360       ExpandSeqLine (buf);
6361       PrintSeqLine (ajp, ffstring, afp->format, buf, gi, currAccVer, start, start + startgapgap, start + lin);
6362     }
6363   }
6364 
6365   str = FFToCharPtr(ffstring);
6366 
6367   FFRecycleString (ajp, ffstring);
6368   return str;
6369 }
6370 
6371 /*
6372 static CharPtr insd_strd [4] = {
6373   NULL, "single", "double", "mixed"
6374 };
6375 
6376 static CharPtr insd_mol [10] = {
6377   "?", "DNA", "RNA", "tRNA", "rRNA", "mRNA", "uRNA", "snRNA", "snoRNA", "AA"
6378 };
6379 
6380 static CharPtr insd_top [3] = {
6381   NULL, "linear", "circular"
6382 };
6383 */
6384 
6385 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
6386 
FormatSlashBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)6387 NLM_EXTERN CharPtr FormatSlashBlock (
6388   Asn2gbFormatPtr afp,
6389   BaseBlockPtr bbp
6390 )
6391 
6392 {
6393   IntAsn2gbJobPtr   ajp;
6394   Asn2gbSectPtr     asp;
6395   GBFeaturePtr      currf, headf, nextf;
6396   GBReferencePtr    currr, headr, nextr;
6397   Uint1             featdeftype;
6398   GBSeqPtr          gbseq, gbtmp;
6399   IntAsn2gbSectPtr  iasp;
6400   IndxPtr           index;
6401   INSDSeq           is;
6402   /*
6403   Int2              moltype, strandedness, topology;
6404   */
6405 
6406   if (afp == NULL || bbp == NULL) return NULL;
6407   ajp = afp->ajp;
6408   if (ajp == NULL) return NULL;
6409   asp = afp->asp;
6410   if (asp == NULL) return NULL;
6411 
6412   iasp = (IntAsn2gbSectPtr) asp;
6413 
6414   /* sort and unique indexes */
6415 
6416   index = ajp->index;
6417 
6418   if (index != NULL) {
6419 
6420     MemCopy (index, &asp->index, sizeof (IndxBlock));
6421     MemSet (&asp->index, 0, sizeof (IndxBlock));
6422 
6423     index->authors = ValNodeSort (index->authors, SortVnpByString);
6424     index->authors = UniqueValNode (index->authors);
6425 
6426     index->genes = ValNodeSort (index->genes, SortVnpByString);
6427     index->genes = UniqueValNode (index->genes);
6428 
6429     index->journals = ValNodeSort (index->journals, SortVnpByString);
6430     index->journals = UniqueValNode (index->journals);
6431 
6432     index->keywords = ValNodeSort (index->keywords, SortVnpByString);
6433     index->keywords = UniqueValNode (index->keywords);
6434 
6435     index->secondaries = ValNodeSort (index->secondaries, SortVnpByString);
6436     index->secondaries = UniqueValNode (index->secondaries);
6437   }
6438 
6439   /* adjust XML-ized GenBank format */
6440 
6441   gbseq = ajp->gbseq;
6442 
6443   if (gbseq != NULL) {
6444 
6445     MemCopy (gbseq, &asp->gbseq, sizeof (GBSeq));
6446     MemSet (&asp->gbseq, 0, sizeof (GBSeq));
6447 
6448     /* reverse order of references */
6449 
6450     headr = NULL;
6451     for (currr = gbseq->references; currr != NULL; currr = nextr) {
6452       nextr = currr->next;
6453       currr->next = headr;
6454       headr = currr;
6455     }
6456     gbseq->references = headr;
6457 
6458     /* reverse order of features */
6459 
6460     headf = NULL;
6461     for (currf = gbseq->feature_table; currf != NULL; currf = nextf) {
6462       nextf = currf->next;
6463       currf->next = headf;
6464       headf = currf;
6465     }
6466     gbseq->feature_table = headf;
6467   }
6468 
6469   /* if generating GBSeq XML/ASN, write at each slash block */
6470 
6471   if (gbseq != NULL && afp->aip != NULL) {
6472     if (ajp->produceInsdSeq) {
6473       MemSet ((Pointer) &is, 0, sizeof (INSDSeq));
6474       is.next = (INSDSeqPtr) gbseq->next;
6475       is.OBbits__ = gbseq->OBbits__;
6476       is.locus = gbseq->locus;
6477       is.length = gbseq->length;
6478       is.strandedness = gbseq->strandedness;
6479       is.moltype = gbseq->moltype;
6480       is.topology = gbseq->topology;
6481       /*
6482       strandedness = (Int2) gbseq->strandedness;
6483       if (strandedness < 0 || strandedness > 3) {
6484         strandedness = 0;
6485       }
6486       is.strandedness = StringSave (insd_strd [strandedness]);
6487       moltype = (Int2) gbseq->moltype;
6488       if (moltype < 0 || moltype > 9) {
6489         moltype = 0;
6490       }
6491       is.moltype = StringSave (insd_mol [moltype]);
6492       topology = (Int2) gbseq->topology;
6493       if (topology < 0 || topology > 2) {
6494         topology = 0;
6495       }
6496       is.topology = StringSave (insd_top [topology]);
6497       */
6498       is.division = gbseq->division;
6499       is.update_date = gbseq->update_date;
6500       is.create_date = gbseq->create_date;
6501       is.update_release = gbseq->update_release;
6502       is.create_release = gbseq->create_release;
6503       is.definition = gbseq->definition;
6504       is.primary_accession = gbseq->primary_accession;
6505       is.entry_version = gbseq->entry_version;
6506       is.accession_version = gbseq->accession_version;
6507       is.other_seqids = gbseq->other_seqids;
6508       is.secondary_accessions = gbseq->secondary_accessions;
6509       is.project = gbseq->project;
6510       is.keywords = gbseq->keywords;
6511       is.segment = gbseq->segment;
6512       is.source = gbseq->source;
6513       is.organism = gbseq->organism;
6514       is.taxonomy = gbseq->taxonomy;
6515       is.references = (INSDReferencePtr) gbseq->references;
6516       is.comment = gbseq->comment;
6517       is.comment_set = (INSDCommentPtr) gbseq->comment_set;
6518       is.struc_comments = (INSDStrucCommentPtr) gbseq->struc_comments;
6519       is.primary = gbseq->primary;
6520       is.source_db = gbseq->source_db;
6521       is.database_reference = gbseq->database_reference;
6522       is.feature_table = (INSDFeaturePtr) gbseq->feature_table;
6523       is.feature_set = (INSDFeatureSetPtr) gbseq->feature_set;
6524       is.sequence = gbseq->sequence;
6525       is.contig = gbseq->contig;
6526       is.alt_seq = (INSDAltSeqDataPtr) gbseq->alt_seq;
6527       is.xrefs = (INSDXrefPtr) gbseq->xrefs;
6528       INSDSeqAsnWrite (&is, afp->aip, afp->atp);
6529     } else {
6530       GBSeqAsnWrite (gbseq, afp->aip, afp->atp);
6531     }
6532     if (afp->atp == NULL) {
6533       AsnPrintNewLine (afp->aip);
6534     }
6535     AsnIoFlush (afp->aip);
6536 
6537     /* clean up gbseq fields */
6538 
6539     gbtmp = GBSeqNew ();
6540     MemCopy (gbtmp, gbseq, sizeof (GBSeq));
6541     MemSet (gbseq, 0, sizeof (GBSeq));
6542     GBSeqFree (gbtmp);
6543   }
6544 
6545   /* then clean up javascript components */
6546 
6547   iasp->gi = MemFree (iasp->gi);
6548   iasp->acc = MemFree (iasp->acc);
6549   for (featdeftype = 0; featdeftype < FEATDEF_MAX; featdeftype++) {
6550     iasp->feat_key [featdeftype] = MemFree (iasp->feat_key [featdeftype]);
6551   }
6552 
6553   /* slash has string pre-allocated by add slash block function */
6554 
6555   return StringSaveNoNull (bbp->string);
6556 }
6557 
6558 
6559