1 /* asn2gnb6.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb6.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 * Version Creation Date: 10/21/98
32 *
33 * $Revision: 1.381 $
34 *
35 * File Description: New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59 #include <findrepl.h>
60 #include <valid.h>
61
62 #ifdef WIN_MAC
63 #if __profile__
64 #include <Profiler.h>
65 #endif
66 #endif
67
68 static CharPtr link_tax = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
69
70 static CharPtr link_featn = "https://www.ncbi.nlm.nih.gov/nuccore/";
71 static CharPtr link_featp = "https://www.ncbi.nlm.nih.gov/protein/";
72
73 static CharPtr link_seqn = "https://www.ncbi.nlm.nih.gov/nuccore/";
74 static CharPtr link_seqp = "https://www.ncbi.nlm.nih.gov/protein/";
75
76 static CharPtr link_lat_lon = "https://www.ncbi.nlm.nih.gov/projects/Sequin/latlonview.html?";
77
78 static CharPtr link_gold_stamp_id = "http://genomesonline.org/cgi-bin/GOLD/bin/GOLDCards.cgi?goldstamp=";
79
80 static CharPtr link_annot_soft_ver = "https://www.ncbi.nlm.nih.gov/genome/annotation_euk/release_notes/#version";
81
82 static CharPtr link_annot_ver = "https://www.ncbi.nlm.nih.gov/genome/annotation_euk/";
83
84 /* ordering arrays for qualifiers and note components */
85
86 static SourceType source_qual_order [] = {
87 SCQUAL_organism,
88
89 SCQUAL_organelle,
90
91 SCQUAL_mol_type,
92
93 SCQUAL_strain,
94 SCQUAL_sub_strain,
95 SCQUAL_variety,
96 SCQUAL_serotype,
97 SCQUAL_serovar,
98 SCQUAL_cultivar,
99 SCQUAL_isolate,
100 SCQUAL_isolation_source,
101 SCQUAL_spec_or_nat_host,
102 SCQUAL_sub_species,
103
104 SCQUAL_specimen_voucher,
105 SCQUAL_culture_collection,
106 SCQUAL_bio_material,
107
108 SCQUAL_type_material,
109
110 SCQUAL_db_xref,
111 SCQUAL_org_xref,
112
113 SCQUAL_chromosome,
114
115 SCQUAL_segment,
116
117 SCQUAL_map,
118 SCQUAL_clone,
119 SCQUAL_sub_clone,
120 SCQUAL_haplotype,
121 SCQUAL_haplogroup,
122 SCQUAL_sex,
123 SCQUAL_mating_type,
124 SCQUAL_cell_line,
125 SCQUAL_cell_type,
126 SCQUAL_tissue_type,
127 SCQUAL_clone_lib,
128 SCQUAL_dev_stage,
129 SCQUAL_ecotype,
130
131 SCQUAL_germline,
132 SCQUAL_rearranged,
133 SCQUAL_transgenic,
134 SCQUAL_environmental_sample,
135
136 SCQUAL_lab_host,
137 SCQUAL_pop_variant,
138 SCQUAL_tissue_lib,
139
140 SCQUAL_plasmid_name,
141 SCQUAL_transposon_name,
142 SCQUAL_ins_seq_name,
143
144 SCQUAL_country,
145
146 SCQUAL_focus,
147
148 SCQUAL_lat_lon,
149 SCQUAL_altitude,
150 SCQUAL_collection_date,
151 SCQUAL_collected_by,
152 SCQUAL_identified_by,
153 /*
154 SCQUAL_fwd_primer_seq,
155 SCQUAL_rev_primer_seq,
156 SCQUAL_fwd_primer_name,
157 SCQUAL_rev_primer_name,
158 */
159 SCQUAL_PCR_primers,
160 SCQUAL_PCR_reaction,
161
162 SCQUAL_note,
163
164 SCQUAL_sequenced_mol,
165 SCQUAL_label,
166 SCQUAL_usedin,
167 SCQUAL_citation,
168 (SourceType) 0
169 };
170
171 static SourceType source_desc_note_order [] = {
172 SCQUAL_seqfeat_note,
173 SCQUAL_orgmod_note,
174 SCQUAL_subsource_note,
175
176 SCQUAL_metagenomic,
177
178 SCQUAL_linkage_group,
179
180 SCQUAL_type,
181 SCQUAL_sub_type,
182 SCQUAL_serogroup,
183 SCQUAL_pathovar,
184 SCQUAL_chemovar,
185 SCQUAL_biovar,
186 SCQUAL_biotype,
187 SCQUAL_group,
188 SCQUAL_sub_group,
189 SCQUAL_common,
190 SCQUAL_acronym,
191 SCQUAL_dosage,
192
193 SCQUAL_authority,
194 SCQUAL_forma,
195 SCQUAL_forma_specialis,
196 SCQUAL_synonym,
197 SCQUAL_anamorph,
198 SCQUAL_teleomorph,
199 SCQUAL_breed,
200 SCQUAL_frequency,
201
202 SCQUAL_metagenome_source,
203 SCQUAL_metagenome_note,
204
205 SCQUAL_genotype,
206 SCQUAL_plastid_name,
207
208 SCQUAL_endogenous_virus_name,
209
210 SCQUAL_common_name,
211
212 SCQUAL_PCR_primer_note,
213 SCQUAL_PCR_reaction,
214
215 SCQUAL_zero_orgmod,
216 SCQUAL_one_orgmod,
217 SCQUAL_zero_subsrc,
218
219 /* SCQUAL_old_lineage, */
220
221 /* SCQUAL_old_name, */
222 (SourceType) 0
223 };
224
225 static SourceType source_feat_note_order [] = {
226 SCQUAL_unstructured,
227
228 SCQUAL_metagenomic,
229
230 SCQUAL_linkage_group,
231 SCQUAL_mating_type,
232
233 SCQUAL_type,
234 SCQUAL_sub_type,
235 SCQUAL_serogroup,
236 SCQUAL_pathovar,
237 SCQUAL_chemovar,
238 SCQUAL_biovar,
239 SCQUAL_biotype,
240 SCQUAL_group,
241 SCQUAL_sub_group,
242 SCQUAL_common,
243 SCQUAL_acronym,
244 SCQUAL_dosage,
245
246 SCQUAL_authority,
247 SCQUAL_forma,
248 SCQUAL_forma_specialis,
249 SCQUAL_synonym,
250 SCQUAL_anamorph,
251 SCQUAL_teleomorph,
252 SCQUAL_breed,
253 SCQUAL_frequency,
254
255 SCQUAL_metagenome_source,
256 SCQUAL_metagenome_note,
257
258 SCQUAL_genotype,
259 SCQUAL_plastid_name,
260
261 SCQUAL_endogenous_virus_name,
262
263 SCQUAL_seqfeat_note,
264 SCQUAL_orgmod_note,
265 SCQUAL_subsource_note,
266
267 SCQUAL_common_name,
268
269 SCQUAL_PCR_primer_note,
270 SCQUAL_PCR_reaction,
271
272 SCQUAL_zero_orgmod,
273 SCQUAL_one_orgmod,
274 SCQUAL_zero_subsrc,
275
276 /* SCQUAL_old_lineage, */
277
278 /* SCQUAL_old_name, */
279 (SourceType) 0
280 };
281
282 NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
283 { "", Qual_class_ignore },
284 { "acronym", Qual_class_orgmod },
285 { "altitude", Qual_class_subsource },
286 { "anamorph", Qual_class_orgmod },
287 { "authority", Qual_class_orgmod },
288 { "biotype", Qual_class_orgmod },
289 { "biovar", Qual_class_orgmod },
290 { "bio_material", Qual_class_voucher },
291 { "breed", Qual_class_orgmod },
292 { "cell_line", Qual_class_subsource },
293 { "cell_type", Qual_class_subsource },
294 { "chemovar", Qual_class_orgmod },
295 { "chromosome", Qual_class_subsource },
296 { "citation", Qual_class_pubset },
297 { "clone", Qual_class_subsource },
298 { "clone_lib", Qual_class_subsource },
299 { "collected_by", Qual_class_subsource },
300 { "collection_date", Qual_class_subsource },
301 { "common", Qual_class_orgmod },
302 { "common", Qual_class_string },
303 { "country", Qual_class_subsource },
304 { "cultivar", Qual_class_orgmod },
305 { "culture_collection", Qual_class_voucher },
306 { "db_xref", Qual_class_db_xref },
307 { "db_xref", Qual_class_db_xref },
308 { "dev_stage", Qual_class_subsource },
309 { "dosage", Qual_class_orgmod },
310 { "ecotype", Qual_class_orgmod },
311 { "endogenous_virus", Qual_class_subsource },
312 { "environmental_sample", Qual_class_subsource },
313 { "extrachromosomal", Qual_class_boolean },
314 { "focus", Qual_class_boolean },
315 { "forma", Qual_class_orgmod },
316 { "forma_specialis", Qual_class_orgmod },
317 { "frequency", Qual_class_subsource },
318 { "fwd_primer_name", Qual_class_subsource },
319 { "fwd_primer_seq", Qual_class_subsource },
320 { "gb_acronym", Qual_class_orgmod },
321 { "gb_anamorph", Qual_class_orgmod },
322 { "gb_synonym", Qual_class_orgmod },
323 { "genotype", Qual_class_subsource },
324 { "germline", Qual_class_subsource },
325 { "group", Qual_class_orgmod },
326 { "haplogroup", Qual_class_subsource },
327 { "haplotype", Qual_class_subsource },
328 { "identified_by", Qual_class_subsource },
329 { "insertion_seq", Qual_class_subsource },
330 { "isolate", Qual_class_orgmod },
331 { "isolation_source", Qual_class_subsource },
332 { "lab_host", Qual_class_subsource },
333 { "label", Qual_class_label },
334 { "lat_lon", Qual_class_lat_lon },
335 { "linkage_group", Qual_class_subsource },
336 { "macronuclear", Qual_class_boolean },
337 { "map", Qual_class_subsource },
338 { "mating_type", Qual_class_subsource },
339 { "derived from metagenome", Qual_class_orgmod },
340 { "metagenome_source", Qual_class_orgmod },
341 { "metagenomic", Qual_class_subsource },
342 { "mol_type", Qual_class_string },
343 { "note", Qual_class_note },
344 { "old_lineage", Qual_class_orgmod },
345 { "old_name", Qual_class_orgmod },
346 { "organism", Qual_class_string },
347 { "organelle", Qual_class_organelle },
348 { "orgmod_note", Qual_class_orgmod },
349 { "pathovar", Qual_class_orgmod },
350 { "PCR_primers", Qual_class_pcr },
351 { "PCR_primers", Qual_class_pcr },
352 { "PCR_primers", Qual_class_pcr_react },
353 { "phenotype", Qual_class_subsource },
354 { "plasmid", Qual_class_subsource },
355 { "plastid", Qual_class_subsource },
356 { "pop_variant", Qual_class_subsource },
357 { "rearranged", Qual_class_subsource },
358 { "rev_primer_name", Qual_class_subsource },
359 { "rev_primer_seq", Qual_class_subsource },
360 { "segment", Qual_class_subsource },
361 { "seqfeat_note", Qual_class_string },
362 { "sequenced_mol", Qual_class_quote },
363 { "serogroup", Qual_class_orgmod },
364 { "serotype", Qual_class_orgmod },
365 { "serovar", Qual_class_orgmod },
366 { "sex", Qual_class_subsource },
367 { "host", Qual_class_orgmod },
368 { "specimen_voucher", Qual_class_voucher },
369 { "strain", Qual_class_orgmod },
370 { "sub_clone", Qual_class_subsource },
371 { "subgroup", Qual_class_orgmod },
372 { "sub_species", Qual_class_orgmod },
373 { "sub_strain", Qual_class_orgmod },
374 { "subtype", Qual_class_orgmod },
375 { "subsource_note", Qual_class_subsource },
376 { "synonym", Qual_class_orgmod },
377 { "teleomorph", Qual_class_orgmod },
378 { "tissue_lib", Qual_class_subsource },
379 { "tissue_type", Qual_class_subsource },
380 { "transgenic", Qual_class_subsource },
381 { "transposon", Qual_class_subsource },
382 { "type", Qual_class_orgmod },
383 { "type_material", Qual_class_orgmod },
384 { "unstructured", Qual_class_valnode },
385 { "usedin", Qual_class_quote },
386 { "variety", Qual_class_orgmod },
387 { "whole_replicon", Qual_class_subsource },
388 { "?", Qual_class_orgmod },
389 { "?", Qual_class_orgmod },
390 { "?", Qual_class_subsource }
391 };
392
393 NLM_EXTERN SourceType subSourceToSourceIdx [45] = {
394 SCQUAL_zero_subsrc,
395 SCQUAL_chromosome,
396 SCQUAL_map,
397 SCQUAL_clone,
398 SCQUAL_sub_clone,
399 SCQUAL_haplotype,
400 SCQUAL_genotype,
401 SCQUAL_sex,
402 SCQUAL_cell_line,
403 SCQUAL_cell_type,
404 SCQUAL_tissue_type,
405 SCQUAL_clone_lib,
406 SCQUAL_dev_stage,
407 SCQUAL_frequency,
408 SCQUAL_germline,
409 SCQUAL_rearranged,
410 SCQUAL_lab_host,
411 SCQUAL_pop_variant,
412 SCQUAL_tissue_lib,
413 SCQUAL_plasmid_name,
414 SCQUAL_transposon_name,
415 SCQUAL_ins_seq_name,
416 SCQUAL_plastid_name,
417 SCQUAL_country,
418 SCQUAL_segment,
419 SCQUAL_endogenous_virus_name,
420 SCQUAL_transgenic,
421 SCQUAL_environmental_sample,
422 SCQUAL_isolation_source,
423 SCQUAL_lat_lon,
424 SCQUAL_collection_date,
425 SCQUAL_collected_by,
426 SCQUAL_identified_by,
427 SCQUAL_fwd_primer_seq,
428 SCQUAL_rev_primer_seq,
429 SCQUAL_fwd_primer_name,
430 SCQUAL_rev_primer_name,
431 SCQUAL_metagenomic,
432 SCQUAL_mating_type,
433 SCQUAL_linkage_group,
434 SCQUAL_haplogroup,
435 SCQUAL_whole_replicon,
436 SCQUAL_phenotype,
437 SCQUAL_altitude,
438 SCQUAL_subsource_note
439 };
440
441 /* ********************************************************************** */
442
443 /* ********************************************************************** */
444
445 /* format functions allocate printable string for given paragraph */
446
447 /* superset of https://www.ncbi.nlm.nih.gov/collab/db_xref.html and RefSeq db_xrefs */
448
449 NLM_EXTERN CharPtr legalDbXrefs [] = {
450 "AceView/WormGenes",
451 "AFTOL",
452 "AntWeb",
453 "APHIDBASE",
454 "ApiDB",
455 "ApiDB_CryptoDB",
456 "ApiDB_PlasmoDB",
457 "ApiDB_ToxoDB",
458 "Araport",
459 "ASAP",
460 "ATCC",
461 "ATCC(in host)",
462 "ATCC(dna)",
463 "Axeldb",
464 "BDGP_EST",
465 "BDGP_INS",
466 "BEEBASE",
467 "BEETLEBASE",
468 "BEI",
469 "BGD",
470 "BOLD",
471 "CDD",
472 "CGD",
473 "CK",
474 "COG",
475 "dbClone",
476 "dbCloneLib",
477 "dbEST",
478 "dbProbe",
479 "dbSNP",
480 "dbSTS",
481 "dictyBase",
482 "DSM",
483 "DSMZ",
484 "EcoGene",
485 "ENSEMBL",
486 "EnsemblGenomes",
487 "EnsemblGenomes-Gn",
488 "EnsemblGenomes-Tr",
489 "ERIC",
490 "ESTLIB",
491 "FANTOM_DB",
492 "FBOL",
493 "FLYBASE",
494 "GABI",
495 "GDB",
496 "GeneDB",
497 "GeneID",
498 "GO",
499 "GOA",
500 "Greengenes",
501 "GRIN",
502 "H-InvDB",
503 "HGNC",
504 "HMP",
505 "HOMD",
506 "HSSP",
507 "I5KNAL",
508 "IKMC",
509 "IMGT/GENE-DB",
510 "IMGT/HLA",
511 "IMGT/LIGM",
512 "InterimID",
513 "InterPro",
514 "IntrepidBio",
515 "IRD",
516 "ISD",
517 "ISFinder",
518 "ISHAM-ITS",
519 "JCM",
520 "JGIDB",
521 "LocusID",
522 "MaizeGDB",
523 "MedGen",
524 "MGI",
525 "MIM",
526 "miRBase",
527 "MycoBank",
528 "NBRC",
529 "NextDB",
530 "niaEST",
531 "NMPDR",
532 "NRESTdb",
533 "OrthoMCL",
534 "Osa1",
535 "Pathema",
536 "PBmice",
537 "PDB",
538 "PFAM",
539 "PGN",
540 "Phytozome",
541 "PIR",
542 "PomBase",
543 "PSEUDO",
544 "PseudoCap",
545 "RAP-DB",
546 "RATMAP",
547 "RFAM",
548 "RGD",
549 "RiceGenes",
550 "RZPD",
551 "SEED",
552 "SGD",
553 "SGN",
554 "SoyBase",
555 "SRPDB",
556 "SubtiList",
557 "TAIR",
558 "taxon",
559 "TIGRFAM",
560 "TubercuList",
561 "UniGene",
562 "UNILIB",
563 "UniProtKB/Swiss-Prot",
564 "UniProtKB/TrEMBL",
565 "UniSTS",
566 "UNITE",
567 "VBASE2",
568 "VectorBase",
569 "Vega",
570 "VGNC",
571 "ViPR",
572 "VISTA",
573 "WorfDB",
574 "WormBase",
575 "Xenbase",
576 "ZFIN",
577 NULL
578 };
579
580 NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
581 "AFTOL",
582 "AntWeb",
583 "ATCC",
584 "ATCC(dna)",
585 "ATCC(in host)",
586 "BEI",
587 "BOLD",
588 "DSM",
589 "DSMZ",
590 "FANTOM_DB",
591 "FBOL",
592 "FLYBASE",
593 "Fungorum",
594 "Greengenes",
595 "GRIN",
596 "HMP",
597 "HOMD",
598 "IKMC",
599 "IMGT/HLA",
600 "IMGT/LIGM",
601 "ISHAM-ITS",
602 "JCM",
603 "MGI",
604 "MycoBank",
605 "NBRC",
606 "RBGE_garden",
607 "RBGE_herbarium",
608 "RZPD",
609 "taxon",
610 "UNILIB",
611 "UNITE",
612 NULL
613 };
614
615 NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
616 "BioProject",
617 "BioSample",
618 "CCDS",
619 "CGNC",
620 "CloneID",
621 "CollecTF",
622 "ECOCYC",
623 "GenBank",
624 "HPM",
625 "HPRD",
626 "LRG",
627 "NASONIABASE",
628 "PBR",
629 "REBASE",
630 "RefSeq",
631 "SK-FST",
632 "VBRC",
633 NULL
634 };
635
IsDbxrefInList(CharPtr name,CharPtr PNTR list,size_t num,BoolPtr badcapP,CharPtr PNTR goodcapP)636 static Boolean IsDbxrefInList (
637 CharPtr name,
638 CharPtr PNTR list,
639 size_t num,
640 BoolPtr badcapP,
641 CharPtr PNTR goodcapP
642 )
643
644 {
645 Int2 L, R, mid;
646
647 L = 0;
648 R = num;
649
650 while (L < R) {
651 mid = (L + R) / 2;
652 if (StringICmp (list [mid], name) < 0) {
653 L = mid + 1;
654 } else {
655 R = mid;
656 }
657 }
658
659 if (StringICmp (list [R], name) == 0) {
660 if (StringCmp (list [R], name) != 0) {
661 if (badcapP != NULL) {
662 *badcapP = TRUE;
663 }
664 if (goodcapP != NULL) {
665 *goodcapP = list [R];
666 }
667 }
668 return TRUE;
669 }
670
671 return FALSE;
672 }
673
DbxrefIsValid(CharPtr name,BoolPtr is_refseq_P,BoolPtr is_source_P,BoolPtr is_badcap_P,CharPtr PNTR goodcapP)674 NLM_EXTERN Boolean DbxrefIsValid (
675 CharPtr name,
676 BoolPtr is_refseq_P,
677 BoolPtr is_source_P,
678 BoolPtr is_badcap_P,
679 CharPtr PNTR goodcapP
680 )
681
682 {
683 if (is_refseq_P != NULL) {
684 *is_refseq_P = FALSE;
685 }
686 if (is_source_P != NULL) {
687 *is_source_P = FALSE;
688 }
689 if (is_badcap_P != NULL) {
690 *is_badcap_P = FALSE;
691 }
692 if (goodcapP != NULL) {
693 *goodcapP = NULL;
694 }
695
696 if (StringHasNoText (name)) return FALSE;
697
698 if (IsDbxrefInList (name, legalRefSeqDbXrefs,
699 sizeof (legalRefSeqDbXrefs) / sizeof (legalRefSeqDbXrefs [0]) - 1,
700 is_badcap_P, goodcapP)) {
701 if (is_refseq_P != NULL) {
702 *is_refseq_P = TRUE;
703 }
704 return TRUE;
705 }
706
707 if (IsDbxrefInList (name, legalSrcDbXrefs,
708 sizeof (legalSrcDbXrefs) / sizeof (legalSrcDbXrefs [0]) - 1,
709 is_badcap_P, goodcapP)) {
710 if (is_source_P != NULL) {
711 *is_source_P = TRUE;
712 }
713 return TRUE;
714 }
715
716 if (IsDbxrefInList (name, legalDbXrefs,
717 sizeof (legalDbXrefs) / sizeof (legalDbXrefs [0]) - 1,
718 is_badcap_P, goodcapP)) {
719 return TRUE;
720 }
721
722 return FALSE;
723 }
724
725
726 /* These functions are for testing dbxrefs */
727
MakeDbxrefList(void)728 static ValNodePtr MakeDbxrefList (void)
729 {
730 ValNodePtr dbxref_list = NULL;
731 Int4 i;
732 DbtagPtr dbtag;
733
734 for (i = 0; legalDbXrefs [i] != NULL; i++) {
735 dbtag = DbtagNew ();
736 dbtag->db = StringSave (legalDbXrefs [i]);
737 dbtag->tag = ObjectIdNew ();
738 dbtag->tag->id = 42;
739 ValNodeAddPointer (&dbxref_list, 0, dbtag);
740 }
741
742 /* legalSrcDbXrefs is contained within legalDbXrefs */
743
744 for (i = 0; legalRefSeqDbXrefs [i] != NULL; i++) {
745 dbtag = DbtagNew ();
746 dbtag->db = StringSave (legalRefSeqDbXrefs [i]);
747 dbtag->tag = ObjectIdNew ();
748 dbtag->tag->id = 42;
749 ValNodeAddPointer (&dbxref_list, 0, dbtag);
750 }
751
752 return dbxref_list;
753 }
754
AddDbxrefsToBioSource(BioSourcePtr biop)755 static void AddDbxrefsToBioSource (BioSourcePtr biop)
756 {
757 if (biop == NULL) return;
758 if (biop->org == NULL)
759 {
760 biop->org = OrgRefNew();
761 }
762
763 ValNodeLink (&(biop->org->db), MakeDbxrefList());
764 }
765
AddDbxrefsToSeqFeat(SeqFeatPtr sfp)766 static void AddDbxrefsToSeqFeat (SeqFeatPtr sfp)
767 {
768 if (sfp == NULL) return;
769 ValNodeLink (&(sfp->dbxref), MakeDbxrefList());
770 }
771
AddAllDbxrefsToBioseq(BioseqPtr bsp)772 NLM_EXTERN void AddAllDbxrefsToBioseq (BioseqPtr bsp)
773 {
774 SeqDescrPtr sdp;
775 SeqFeatPtr sfp;
776 SeqMgrDescContext dcontext;
777 SeqMgrFeatContext fcontext;
778
779 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
780 if (sdp != NULL) {
781 AddDbxrefsToBioSource (sdp->data.ptrvalue);
782 }
783
784 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
785 if (sfp != NULL) {
786 AddDbxrefsToBioSource (sfp->data.value.ptrvalue);
787 AddDbxrefsToSeqFeat (sfp);
788 }
789
790 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
791 if (sfp != NULL) {
792 AddDbxrefsToSeqFeat (sfp);
793 }
794
795 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
796 if (sfp != NULL) {
797 AddDbxrefsToSeqFeat (sfp);
798 }
799 }
800
801
802
803 static CharPtr organellePrefix [] = {
804 NULL,
805 NULL,
806 "Chloroplast ",
807 "Chromoplast ",
808 "Kinetoplast ",
809 "Mitochondrion ",
810 "Plastid ",
811 NULL,
812 NULL,
813 NULL,
814 NULL,
815 NULL,
816 "Cyanelle ",
817 NULL,
818 NULL,
819 "Nucleomorph ",
820 "Apicoplast ",
821 "Leucoplast ",
822 "Proplastid ",
823 NULL,
824 "Hydrogenosome ",
825 NULL,
826 "Chromatophore "
827 };
828
829 static CharPtr newOrganellePrefix [] = {
830 NULL,
831 NULL,
832 "chloroplast ",
833 "chromoplast ",
834 "kinetoplast ",
835 "mitochondrion ",
836 "plastid ",
837 NULL,
838 NULL,
839 NULL,
840 NULL,
841 NULL,
842 "cyanelle ",
843 NULL,
844 NULL,
845 "nucleomorph ",
846 "apicoplast ",
847 "leucoplast ",
848 "proplastid ",
849 NULL,
850 "hydrogenosome ",
851 NULL,
852 "chromatophore "
853 };
854
FormatSourceBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)855 NLM_EXTERN CharPtr FormatSourceBlock (
856 Asn2gbFormatPtr afp,
857 BaseBlockPtr bbp
858 )
859
860 {
861 CharPtr acr = NULL;
862 Boolean addPeriod = TRUE;
863 IntAsn2gbJobPtr ajp;
864 CharPtr ana = NULL;
865 Asn2gbSectPtr asp;
866 BioSourcePtr biop = NULL;
867 CharPtr com = NULL;
868 CharPtr common = NULL;
869 SeqMgrDescContext dcontext;
870 SeqMgrFeatContext fcontext;
871 CharPtr gbacr = NULL;
872 CharPtr gbana = NULL;
873 GBBlockPtr gbp = NULL;
874 GBSeqPtr gbseq;
875 CharPtr gbsyn = NULL;
876 Uint1 genome;
877 CharPtr met = NULL;
878 ValNodePtr mod = NULL;
879 Int2 numacr = 0;
880 Int2 numana = 0;
881 Int2 numcom = 0;
882 Int2 numgbacr = 0;
883 Int2 numgbana = 0;
884 Int2 numgbsyn = 0;
885 Int2 nummet = 0;
886 Int2 numsyn = 0;
887 OrgModPtr omp = NULL;
888 OrgNamePtr onp;
889 CharPtr organelle = NULL;
890 OrgRefPtr orp;
891 CharPtr prefix = " (";
892 SeqDescrPtr sdp;
893 CharPtr second = NULL;
894 SeqFeatPtr sfp;
895 CharPtr str;
896 CharPtr syn = NULL;
897 CharPtr taxname = NULL;
898 StringItemPtr ffstring, temp;
899
900 if (afp == NULL || bbp == NULL) return NULL;
901 ajp = afp->ajp;
902 if (ajp == NULL) return NULL;
903 asp = afp->asp;
904 if (asp == NULL) return NULL;
905
906 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
907
908 ffstring = FFGetString(ajp);
909 if ( ffstring == NULL ) return NULL;
910
911 if (bbp->itemtype == OBJ_SEQDESC) {
912 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
913 if (sdp != NULL) {
914 if (dcontext.seqdesctype == Seq_descr_source) {
915 biop = (BioSourcePtr) sdp->data.ptrvalue;
916 } else if (dcontext.seqdesctype == Seq_descr_genbank) {
917 gbp = (GBBlockPtr) sdp->data.ptrvalue;
918 }
919 }
920 } else if (bbp->itemtype == OBJ_SEQFEAT) {
921 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
922 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
923 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
924 }
925 }
926 if (gbp != NULL) {
927 common = gbp->source;
928 }
929
930 if (biop != NULL) {
931 genome = biop->genome;
932 if (genome <= 22) {
933 if (ajp->newSourceOrg && (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT)) {
934 organelle = newOrganellePrefix [genome];
935 } else {
936 organelle = organellePrefix [genome];
937 }
938 }
939 orp = biop->org;
940 if (orp != NULL) {
941 taxname = orp->taxname;
942 common = orp->common;
943 mod = orp->mod;
944 onp = orp->orgname;
945 if (onp != NULL) {
946
947 if (ajp->newSourceOrg) {
948 for (omp = onp->mod; omp != NULL; omp = omp->next) {
949 switch (omp->subtype) {
950 case ORGMOD_common :
951 com = omp->subname;
952 numcom++;
953 break;
954 case ORGMOD_acronym :
955 acr = omp->subname;
956 numacr++;
957 break;
958 case ORGMOD_synonym :
959 syn = omp->subname;
960 numsyn++;
961 break;
962 case ORGMOD_anamorph :
963 ana = omp->subname;
964 numana++;
965 break;
966 case ORGMOD_gb_acronym :
967 gbacr = omp->subname;
968 numgbacr++;
969 break;
970 case ORGMOD_gb_anamorph :
971 gbana = omp->subname;
972 numgbana++;
973 break;
974 case ORGMOD_gb_synonym :
975 gbsyn = omp->subname;
976 numgbsyn++;
977 break;
978 case ORGMOD_metagenome_source :
979 met = omp->subname;
980 nummet++;
981 break;
982 default :
983 break;
984 }
985 }
986
987 if (numacr > 1) {
988 acr = NULL;
989 }
990 if (numana > 1) {
991 ana = NULL;
992 }
993 if (numcom > 1) {
994 com = NULL;
995 }
996 if (nummet > 1) {
997 met = NULL;
998 }
999 if (numsyn > 1) {
1000 syn = NULL;
1001 }
1002 if (numgbacr > 1) {
1003 gbacr = NULL;
1004 }
1005 if (numgbana > 1) {
1006 gbana = NULL;
1007 }
1008 if (numgbsyn > 1) {
1009 gbsyn = NULL;
1010 }
1011
1012 if (StringHasNoText (second)) {
1013 second = met;
1014 }
1015 if (StringHasNoText (second)) {
1016 second = syn;
1017 }
1018 if (StringHasNoText (second)) {
1019 second = acr;
1020 }
1021 if (StringHasNoText (second)) {
1022 if (StringDoesHaveText (ana)) {
1023 second = ana;
1024 prefix = " (anamorph: ";
1025 }
1026 }
1027 if (StringHasNoText (second)) {
1028 second = com;
1029 }
1030
1031 if (StringHasNoText (second)) {
1032 second = gbsyn;
1033 }
1034 if (StringHasNoText (second)) {
1035 second = gbacr;
1036 }
1037 if (StringHasNoText (second)) {
1038 if (StringDoesHaveText (gbana)) {
1039 second = gbana;
1040 prefix = " (anamorph: ";
1041 }
1042 }
1043 }
1044 }
1045 if (StringHasNoText (second)) {
1046 second = common;
1047 }
1048 }
1049 }
1050
1051 /* If the organelle prefix is already on the */
1052 /* name, don't add it. */
1053
1054 if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0)
1055 organelle = "";
1056
1057 if (StringHasNoText (common)) {
1058 common = taxname;
1059 }
1060 if (StringHasNoText (common)) {
1061 common = "Unknown.";
1062 }
1063 if (StringHasNoText (taxname)) {
1064 taxname = "Unknown.";
1065 }
1066
1067 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1068
1069 temp = FFGetString(ajp);
1070
1071 if (ajp->newSourceOrg) {
1072
1073 if (! StringHasNoText (organelle)) {
1074 FFAddTextToString(temp, NULL, organelle, NULL, FALSE, FALSE, TILDE_IGNORE);
1075 }
1076 FFAddTextToString(temp, NULL, taxname, NULL, FALSE, FALSE, TILDE_IGNORE);
1077 if (! StringHasNoText (second)) {
1078 FFAddTextToString(temp, prefix, second, ")", FALSE, FALSE, TILDE_IGNORE);
1079 }
1080 addPeriod = FALSE;
1081
1082 } else {
1083 FFAddTextToString(temp, NULL, common, NULL, FALSE, FALSE, TILDE_IGNORE);
1084 while (mod != NULL) {
1085 str = (CharPtr) mod->data.ptrvalue;
1086 if (! StringHasNoText (str)) {
1087 FFAddTextToString(temp, " ", str, NULL, FALSE, FALSE, TILDE_IGNORE);
1088 }
1089 mod = mod->next;
1090 }
1091 }
1092
1093 str = FFToCharPtr(temp);
1094 if (StringCmp (str, ".") == 0) {
1095 str = MemFree (str);
1096 }
1097 FFRecycleString(ajp, temp);
1098 /* optionally populate gbseq for XML-ized GenBank format */
1099
1100 if (ajp->gbseq) {
1101 gbseq = &asp->gbseq;
1102 } else {
1103 gbseq = NULL;
1104 }
1105
1106 if (gbseq != NULL) {
1107 gbseq->source = StringSave (str);
1108 }
1109
1110
1111 FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1112 if (str != NULL) {
1113 FFAddTextToString(ffstring, NULL, str, NULL, addPeriod, FALSE, TILDE_TO_SPACES);
1114 } else {
1115 FFAddOneChar(ffstring, '.', FALSE);
1116 }
1117
1118 MemFree (str);
1119
1120 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1121
1122 FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1123 FFAddTextToString(ffstring, NULL, taxname, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1124 if ( StringICmp(taxname, common) != 0 ) {
1125 FFAddTextToString(ffstring, " (", common, ")", FALSE, FALSE, TILDE_TO_SPACES);
1126 }
1127 }
1128
1129 str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 0, 5, "OS");
1130 FFRecycleString(ajp, ffstring);
1131 return str;
1132 }
1133
FormatOrganismBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)1134 NLM_EXTERN CharPtr FormatOrganismBlock (
1135 Asn2gbFormatPtr afp,
1136 BaseBlockPtr bbp
1137 )
1138
1139 {
1140 IntAsn2gbJobPtr ajp;
1141 Asn2gbSectPtr asp;
1142 BioSourcePtr biop = NULL;
1143 Char ch;
1144 CharPtr common = NULL;
1145 DbtagPtr dbt;
1146 SeqMgrDescContext dcontext;
1147 SeqMgrFeatContext fcontext;
1148 GBSeqPtr gbseq;
1149 Uint1 genome;
1150 CharPtr lineage = NULL;
1151 ObjectIdPtr oip;
1152 OrgModPtr omp;
1153 OrgNamePtr onp;
1154 CharPtr organelle = NULL;
1155 OrgRefPtr orp;
1156 SeqDescrPtr sdp;
1157 SeqFeatPtr sfp;
1158 CharPtr str;
1159 Int4 taxid = -1;
1160 CharPtr taxname = NULL;
1161 CharPtr tmp;
1162 CharPtr ptr;
1163 ValNodePtr vnp;
1164 StringItemPtr ffstring, temp;
1165 Char buf [16];
1166
1167 if (afp == NULL || bbp == NULL) return NULL;
1168 ajp = afp->ajp;
1169 if (ajp == NULL) return NULL;
1170 asp = afp->asp;
1171 if (asp == NULL) return NULL;
1172
1173
1174 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
1175
1176 if (bbp->itemtype == OBJ_SEQDESC) {
1177 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
1178 if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
1179 biop = (BioSourcePtr) sdp->data.ptrvalue;
1180 }
1181 } else if (bbp->itemtype == OBJ_SEQFEAT) {
1182 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
1183 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
1184 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
1185 }
1186 }
1187 if (biop != NULL) {
1188 genome = biop->genome;
1189 if (genome <= 22) {
1190 organelle = organellePrefix [genome];
1191 }
1192 orp = biop->org;
1193 if (orp != NULL) {
1194 taxname = orp->taxname;
1195 common = orp->common;
1196 onp = orp->orgname;
1197 if (onp != NULL) {
1198 lineage = onp->lineage;
1199 if (StringHasNoText (lineage)) {
1200 for (omp = onp->mod; omp != NULL; omp = omp->next) {
1201 if (omp->subtype == ORGMOD_old_lineage) {
1202 lineage = omp->subname;
1203 }
1204 }
1205 }
1206 }
1207 for (vnp = orp->db; vnp != NULL; vnp = vnp->next) {
1208 dbt = (DbtagPtr) vnp->data.ptrvalue;
1209 if (dbt == NULL) continue;
1210 if (StringCmp (dbt->db, "taxon") == 0) {
1211 oip = dbt->tag;
1212 if (oip != NULL) {
1213 taxid = oip->id;
1214 }
1215 }
1216 }
1217 }
1218 }
1219
1220 /* If the organelle prefix is already on the */
1221 /* name, don't add it. */
1222
1223 if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
1224 organelle = "";
1225
1226 if (StringHasNoText (common)) {
1227 common = taxname;
1228 }
1229 if (StringHasNoText (common)) {
1230 common = "Unknown.";
1231 }
1232 if (StringHasNoText (taxname)) {
1233 taxname = "Unknown.";
1234 }
1235 if (StringHasNoText (lineage)) {
1236 lineage = "Unclassified.";
1237 }
1238
1239 ffstring = FFGetString(ajp);
1240 temp = FFGetString(ajp);
1241 if ( ffstring == NULL || temp == NULL ) return NULL;
1242
1243 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1244
1245 FFStartPrint(temp, afp->format, 2, 12, "ORGANISM", 12, 5, 5, "OC", FALSE);
1246 if (! ajp->newSourceOrg) {
1247 FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1248 }
1249 if (StringNICmp (taxname, "Unknown", 7) != 0) {
1250 if ( GetWWW(ajp) ) {
1251 if (taxid != -1) {
1252 FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1253 FF_Add_NCBI_Base_URL (temp, link_tax);
1254 FFAddOneString(temp, "id=", FALSE, FALSE, TILDE_IGNORE);
1255 sprintf (buf, "%ld", (long) taxid);
1256 FFAddOneString(temp, buf, FALSE, FALSE, TILDE_IGNORE);
1257 FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1258 } else {
1259 FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1260 FF_Add_NCBI_Base_URL (temp, link_tax);
1261 FFAddOneString(temp, "name=", FALSE, FALSE, TILDE_IGNORE);
1262 tmp = StringSave (taxname);
1263 if (tmp != NULL) {
1264 ptr = tmp;
1265 ch = *ptr;
1266 while (ch != '\0') {
1267 if (IS_WHITESP (ch)) {
1268 *ptr = '+';
1269 }
1270 ptr++;
1271 ch = *ptr;
1272 }
1273 FFAddOneString(temp, tmp, FALSE, FALSE, TILDE_IGNORE);
1274 MemFree (tmp);
1275 }
1276 FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1277 }
1278 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1279 FFAddOneString(temp, "</a>", FALSE, FALSE, TILDE_IGNORE);
1280 } else {
1281 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1282 }
1283 } else {
1284 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1285 }
1286 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1287 FFRecycleString(ajp, temp);
1288
1289 temp = FFGetString(ajp);
1290 FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1291 FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1292 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1293 FFRecycleString(ajp, temp);
1294 /* optionally populate gbseq for XML-ized GenBank format */
1295
1296 if (ajp->gbseq) {
1297 gbseq = &asp->gbseq;
1298 } else {
1299 gbseq = NULL;
1300 }
1301
1302 if (gbseq != NULL) {
1303 temp = FFGetString(ajp);
1304 if (! ajp->newSourceOrg) {
1305 FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1306 }
1307 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1308 gbseq->organism = FFToCharPtr(temp);
1309 gbseq->taxonomy = StringSave (lineage);
1310 FFRecycleString(ajp, temp);
1311 }
1312
1313 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1314 FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1315 FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1316 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC");
1317 FFRecycleString(ajp, temp);
1318 if ( !StringHasNoText(organelle) ) {
1319 temp = FFGetString(ajp);
1320 if ( temp != NULL ) {
1321 FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OG", FALSE);
1322 FFAddTextToString(temp, NULL, organelle, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1323 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG");
1324 FFRecycleString(ajp, temp);
1325 }
1326 }
1327 }
1328
1329 str = FFToCharPtr(ffstring);
1330 FFRecycleString(ajp, ffstring);
1331 return str;
1332 }
1333
1334 /* A tilde is not an EOL if it is found in a string of the form: */
1335 /* /~alpahnumdot/ where alphanumdot is either alpha numeric or '.' */
1336 /* */
1337 /* str points to the tilde in question. */
IsTildeEOL(CharPtr str)1338 static Boolean IsTildeEOL(CharPtr str) {
1339 CharPtr ptr;
1340
1341 if ( *(str - 1) != '/' ) return TRUE;
1342
1343 ++str;
1344
1345
1346 for ( ptr = str;
1347 IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
1348 ++ptr) continue;
1349
1350 return *ptr == '/' ? FALSE : TRUE;
1351 }
1352
1353 /* returns a pointer to the first character past the url */
FindUrlEnding(CharPtr str)1354 static CharPtr FindUrlEnding(CharPtr str) {
1355 CharPtr ptr;
1356
1357 for ( ptr = str;
1358 !IS_WHITESP(*ptr) && *ptr != '\0' && *ptr != '(' && *ptr != '\"';
1359 ++ptr ) {
1360 if ( *ptr == '~' ) {
1361 if ( IsTildeEOL(ptr) ) break;
1362 }
1363 }
1364
1365 --ptr;
1366
1367 /* back up over any trailing periods, commas, or parentheses */
1368 while ( (*ptr == '.') || (*ptr == ',') || (*ptr == ')') ) {
1369 --ptr;
1370 }
1371
1372 ++ptr;
1373
1374 return ptr;
1375 }
1376
CommentHasSuspiciousHtml(IntAsn2gbJobPtr ajp,CharPtr searchString)1377 NLM_EXTERN Boolean CommentHasSuspiciousHtml (
1378 IntAsn2gbJobPtr ajp,
1379 CharPtr searchString
1380 )
1381
1382 {
1383 Char ch;
1384 CharPtr ptr;
1385 Int4 state;
1386 ValNodePtr matches;
1387
1388 if (StringHasNoText (searchString)) return FALSE;
1389
1390 state = 0;
1391 ptr = searchString;
1392 ch = *ptr;
1393
1394 while (ch != '\0') {
1395 matches = NULL;
1396 ch = TO_LOWER (ch);
1397 state = TextFsaNext (ajp->bad_html_fsa, state, ch, &matches);
1398 if (matches != NULL) {
1399 return TRUE;
1400 }
1401 ptr++;
1402 ch = *ptr;
1403 }
1404
1405 return FALSE;
1406 }
1407
AddCommentWithURLlinks(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr prefix,CharPtr str,CharPtr suffix)1408 NLM_EXTERN void AddCommentWithURLlinks (
1409 IntAsn2gbJobPtr ajp,
1410 StringItemPtr ffstring,
1411 CharPtr prefix,
1412 CharPtr str,
1413 CharPtr suffix
1414 )
1415
1416 {
1417 Char ch;
1418 CharPtr ptr;
1419
1420 if (GetWWW (ajp) && CommentHasSuspiciousHtml (ajp, str)) {
1421 if (prefix != NULL) {
1422 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1423 }
1424 AddCommentStringWithTildes (ffstring, str);
1425 if (suffix != NULL) {
1426 FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1427 }
1428 return;
1429 }
1430
1431 /*
1432 if (GetWWW (ajp)) {
1433 str = EncodeXmlEx (str);
1434 }
1435 */
1436
1437 while (! StringHasNoText (str)) {
1438 ptr = StringStr (str, "http://");
1439 if (ptr == NULL) {
1440 ptr = StringStr (str, "https://");
1441 }
1442 if (ptr == NULL) {
1443 if (prefix != NULL) {
1444 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1445 }
1446 AddCommentStringWithTildes (ffstring, str);
1447 if (suffix != NULL) {
1448 FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1449 }
1450 return;
1451 }
1452
1453 *ptr = '\0';
1454 AddCommentStringWithTildes (ffstring, str);
1455 *ptr = 'h';
1456
1457 str = ptr;
1458 ptr = FindUrlEnding(str);
1459
1460
1461 ch = *ptr;
1462 *ptr = '\0';
1463 if ( GetWWW(ajp) ) {
1464 FFAddTextToString(ffstring, "<a href=\"", str, "\">", FALSE, FALSE, TILDE_IGNORE);
1465 FFAddTextToString(ffstring, NULL, str, "</a>", FALSE, FALSE, TILDE_IGNORE);
1466 } else {
1467 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
1468 }
1469
1470 *ptr = ch;
1471 str = ptr;
1472 }
1473 }
1474
StrucCommentFFEndPrint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,Int2 gb_init_indent,Int2 gb_cont_indent,Int2 eb_init_indent,Int2 eb_cont_indent,CharPtr eb_line_prefix)1475 static CharPtr StrucCommentFFEndPrint (
1476 IntAsn2gbJobPtr ajp,
1477 StringItemPtr ffstring,
1478 FmtType format,
1479 Int2 gb_init_indent,
1480 Int2 gb_cont_indent,
1481 Int2 eb_init_indent,
1482 Int2 eb_cont_indent,
1483 CharPtr eb_line_prefix
1484 )
1485 {
1486 StringItemPtr temp = FFGetString(ajp);
1487 CharPtr result;
1488
1489 if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
1490
1491 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1492 FFLineWrap (ajp, temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX - 12, NULL);
1493 } else {
1494 FFLineWrap (ajp, temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX - 5, eb_line_prefix);
1495 }
1496 result = FFToCharPtr (temp);
1497 FFRecycleString (ajp, temp);
1498 return result;
1499 }
1500
ThresholdForStructuredCommentColumnarDisplay(FmtType format)1501 static size_t ThresholdForStructuredCommentColumnarDisplay (
1502 FmtType format
1503 )
1504 {
1505 // We are trying to make those structured comments look pretty. However, if the first column gets
1506 // too big, the printout starts to look ugly. This function attempts to define the first column
1507 // extent at which pretty turns into ugly.
1508
1509 const size_t MAX_COLUMN_WIDTH = 45;
1510 switch ( format ) {
1511
1512 case GENBANK_FMT:
1513 case GENPEPT_FMT:
1514 return MIN( MAX_COLUMN_WIDTH, ASN2FF_GB_MAX - 12 );
1515
1516 default:
1517 return MIN( MAX_COLUMN_WIDTH, ASN2FF_EMBL_MAX - 5 );
1518 }
1519 }
1520
GetStrForStructuredComment(IntAsn2gbJobPtr ajp,UserObjectPtr uop)1521 NLM_EXTERN CharPtr GetStrForStructuredComment (
1522 IntAsn2gbJobPtr ajp,
1523 UserObjectPtr uop
1524 )
1525
1526 {
1527 Char buf [132];
1528 Char ch;
1529 UserFieldPtr curr;
1530 StringItemPtr ffstring;
1531 CharPtr field;
1532 ValNodePtr head = NULL;
1533 size_t len;
1534 CharPtr link_annot_tmp;
1535 size_t max = 0;
1536 ObjectIdPtr oip;
1537 CharPtr prefix = NULL;
1538 CharPtr provider = NULL;
1539 CharPtr ptr;
1540 CharPtr status = NULL;
1541 CharPtr str;
1542 CharPtr suffix = NULL;
1543 CharPtr tmp;
1544
1545 if (ajp == NULL || uop == NULL) return NULL;
1546 if ((oip = uop->type) == NULL) return NULL;
1547 if (StringCmp (oip->str, "StructuredComment") != 0) return NULL;
1548
1549 ffstring = FFGetString (ajp);
1550 if (ffstring == NULL) return NULL;
1551
1552 for (curr = uop->data; curr != NULL; curr = curr->next) {
1553 if (curr->choice != 1) continue;
1554 oip = curr->label;
1555 if (oip == NULL) continue;
1556 field = oip->str;
1557 if (StringHasNoText (field)) continue;
1558 if (StringCmp (field, "StructuredCommentPrefix") == 0) {
1559 str = (CharPtr) curr->data.ptrvalue;
1560 if (StringDoesHaveText (str)) {
1561 prefix = str;
1562 }
1563 continue;
1564 }
1565 if (StringCmp (field, "StructuredCommentSuffix") == 0) {
1566 str = (CharPtr) curr->data.ptrvalue;
1567 if (StringDoesHaveText (str)) {
1568 suffix = str;
1569 }
1570 continue;
1571 }
1572 if (StringCmp (field, "Annotation Provider") == 0) {
1573 str = (CharPtr) curr->data.ptrvalue;
1574 if (StringDoesHaveText (str)) {
1575 provider = str;
1576 }
1577 } else if (StringCmp (field, "Annotation Status") == 0) {
1578 str = (CharPtr) curr->data.ptrvalue;
1579 if (StringDoesHaveText (str)) {
1580 status = str;
1581 }
1582 }
1583 len = StringLen (field);
1584 if (len > max) {
1585 max = len;
1586 }
1587 }
1588
1589 if (StringHasNoText (prefix)) {
1590 prefix = "##Metadata-START##";
1591 }
1592 if (StringHasNoText (suffix)) {
1593 suffix = "##Metadata-END##";
1594 }
1595
1596 if (StringDoesHaveText (prefix)) {
1597 tmp = (CharPtr) MemNew (StringLen (prefix) + 4);
1598 if (tmp != NULL) {
1599 sprintf (tmp, "%s\n", prefix);
1600 ValNodeAddStr (&head, 0, tmp);
1601 }
1602 }
1603 if (max > ThresholdForStructuredCommentColumnarDisplay (ajp->format)) {
1604 for (curr = uop->data; curr != NULL; curr = curr->next) {
1605 if (curr->choice != 1) continue;
1606 oip = curr->label;
1607 if (oip == NULL) continue;
1608 field = oip->str;
1609 if (StringHasNoText (field)) continue;
1610 if (StringCmp (field, "StructuredCommentPrefix") == 0) continue;
1611 if (StringCmp (field, "StructuredCommentSuffix") == 0) continue;
1612 str = (CharPtr) curr->data.ptrvalue;
1613 if (StringHasNoText (str)) continue;
1614 ValNodeCopyStr (&head, 0, field);
1615 /*
1616 ValNodeCopyStr (&head, 0, " ");
1617 */
1618 ValNodeCopyStr (&head, 0, " :: ");
1619 ValNodeCopyStr (&head, 0, str);
1620 ValNodeCopyStr (&head, 0, "\n");
1621 }
1622 } else {
1623 for (curr = uop->data; curr != NULL; curr = curr->next) {
1624 if (curr->choice != 1) continue;
1625 oip = curr->label;
1626 if (oip == NULL) continue;
1627 field = oip->str;
1628 if (StringHasNoText (field)) continue;
1629 if (StringCmp (field, "StructuredCommentPrefix") == 0) continue;
1630 if (StringCmp (field, "StructuredCommentSuffix") == 0) continue;
1631 str = (CharPtr) curr->data.ptrvalue;
1632 if (StringHasNoText (str)) continue;
1633 len = max + StringLen (str) + 4;
1634 /*
1635 FFStartPrint (ffstring, GENBANK_FMT, 0, max + 1, field, max + 1, 0, max + 1, field, TRUE);
1636 */
1637 StringNCpy_0 (buf, field, sizeof (buf) - 40);
1638 StringCat (buf, " ");
1639 buf [max + 1] = ':';
1640 buf [max + 2] = ':';
1641 buf [max + 3] = '\0';
1642 FFStartPrint (ffstring, GENBANK_FMT, 0, max + 4, buf, max + 4, 0, max + 4, buf, TRUE);
1643
1644 if (GetWWW (ajp) && StringCmp (field, "GOLD Stamp ID") == 0 && StringNCmp (str, "Gi", 2) == 0) {
1645 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1646 FF_Add_NCBI_Base_URL (ffstring, link_gold_stamp_id);
1647 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1648 /* FFAddOneString (ffstring, ".html", FALSE, FALSE, TILDE_EXPAND); */
1649 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1650 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1651 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1652
1653 } else if (GetWWW (ajp) &&
1654 StringCmp (prefix, "##Genome-Annotation-Data-START##") == 0 &&
1655 StringCmp (field, "Annotation Software Version") == 0) {
1656 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1657 FF_Add_NCBI_Base_URL (ffstring, link_annot_soft_ver);
1658 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1659 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1660 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1661 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1662
1663 } else if (GetWWW (ajp) &&
1664 StringCmp (prefix, "##Genome-Annotation-Data-START##") == 0 &&
1665 StringCmp (field, "Annotation Version") == 0 &&
1666 StringCmp (provider, "NCBI") == 0 &&
1667 StringCmp (status, "Full annotation") == 0) {
1668 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1669 FF_Add_NCBI_Base_URL (ffstring, link_annot_ver);
1670 link_annot_tmp = StringSave (str);
1671 if (link_annot_tmp != NULL) {
1672 ptr = StringStr (link_annot_tmp, " Annotation Release ");
1673 if (ptr != NULL) {
1674 *ptr = '\0';
1675 StringCat (link_annot_tmp, "/");
1676 ptr += 20;
1677 StringCat (link_annot_tmp, ptr);
1678 ptr = link_annot_tmp;
1679 ch = *ptr;
1680 while (ch != '\0') {
1681 if (ch == ' ') {
1682 *ptr = '_';
1683 }
1684 ptr++;
1685 ch = *ptr;
1686 }
1687 }
1688 FFAddOneString (ffstring, link_annot_tmp, FALSE, FALSE, TILDE_EXPAND);
1689 MemFree (link_annot_tmp);
1690 }
1691 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1692 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1693 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1694
1695 } else if (GetWWW (ajp) && StringCmp (field, "url") == 0) {
1696 AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
1697 } else if (GetWWW (ajp) && StringNICmp (str, "http://", 7) == 0) {
1698 AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
1699 } else if (GetWWW (ajp) && StringNICmp (str, "https://", 8) == 0) {
1700 AddCommentWithURLlinks (ajp, ffstring, NULL, str, NULL);
1701 } else {
1702 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
1703 }
1704 /*
1705 FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
1706 */
1707 /*
1708 tmp = StrucCommentFFEndPrint (ajp, ffstring, ajp->format, max + 1, max + 1, 0, max + 1, NULL);
1709 */
1710 tmp = StrucCommentFFEndPrint (ajp, ffstring, ajp->format, max + 4, max + 4, 0, max + 4, NULL);
1711 ValNodeCopyStr (&head, 0, tmp);
1712 MemFree (tmp);
1713 FFRecycleString (ajp, ffstring);
1714 ffstring = FFGetString (ajp);
1715 /*
1716 tmp = (CharPtr) MemNew (len);
1717 if (tmp == NULL) continue;
1718 StringCpy (tmp, field);
1719 len = StringLen (tmp);
1720 while (len < max) {
1721 tmp [len] = ' ';
1722 len++;
1723 }
1724 tmp [len] = '\0';
1725 StringCat (tmp, " ");
1726 StringCat (tmp, str);
1727 StringCat (tmp, "\n");
1728 ValNodeCopyStr (&head, 0, tmp);
1729 MemFree (tmp);
1730 */
1731 }
1732 }
1733 if (StringDoesHaveText (suffix)) {
1734 tmp = (CharPtr) MemNew (StringLen (suffix) + 4);
1735 if (tmp != NULL) {
1736 sprintf (tmp, "%s\n", suffix);
1737 ValNodeAddStr (&head, 0, tmp);
1738 }
1739 }
1740
1741 if (head == NULL) return NULL;
1742
1743 str = MergeFFValNodeStrs (head);
1744 ValNodeFreeData (head);
1745
1746 FFRecycleString (ajp, ffstring);
1747
1748 return str;
1749 }
1750
GetStructuredCommentTable(IntAsn2gbJobPtr ajp,UserObjectPtr uop)1751 static CharPtr GetStructuredCommentTable (
1752 IntAsn2gbJobPtr ajp,
1753 UserObjectPtr uop
1754 )
1755
1756 {
1757 UserFieldPtr curr;
1758 CharPtr field;
1759 ValNodePtr head = NULL;
1760 ObjectIdPtr oip;
1761 CharPtr prefix = NULL;
1762 CharPtr str;
1763 CharPtr suffix = NULL;
1764
1765 if (ajp == NULL || uop == NULL) return NULL;
1766 if ((oip = uop->type) == NULL) return NULL;
1767 if (StringCmp (oip->str, "StructuredComment") != 0) return NULL;
1768
1769 for (curr = uop->data; curr != NULL; curr = curr->next) {
1770 if (curr->choice != 1) continue;
1771 oip = curr->label;
1772 if (oip == NULL) continue;
1773 field = oip->str;
1774 if (StringHasNoText (field)) continue;
1775 if (StringCmp (field, "StructuredCommentPrefix") == 0) {
1776 str = (CharPtr) curr->data.ptrvalue;
1777 if (StringDoesHaveText (str)) {
1778 prefix = str;
1779 }
1780 continue;
1781 }
1782 if (StringCmp (field, "StructuredCommentSuffix") == 0) {
1783 str = (CharPtr) curr->data.ptrvalue;
1784 if (StringDoesHaveText (str)) {
1785 suffix = str;
1786 }
1787 continue;
1788 }
1789 }
1790
1791 if (StringHasNoText (prefix)) {
1792 prefix = "##Metadata-START##";
1793 }
1794 if (StringHasNoText (suffix)) {
1795 suffix = "##Metadata-END##";
1796 }
1797
1798 if (StringDoesHaveText (prefix)) {
1799 ValNodeCopyStr (&head, 0, prefix);
1800 if (ajp->oldXmlPolicy) {
1801 ValNodeCopyStr (&head, 0, "\n");
1802 } else {
1803 ValNodeCopyStr (&head, 0, "\\n");
1804 }
1805 }
1806
1807 for (curr = uop->data; curr != NULL; curr = curr->next) {
1808 if (curr->choice != 1) continue;
1809 oip = curr->label;
1810 if (oip == NULL) continue;
1811 field = oip->str;
1812 if (StringHasNoText (field)) continue;
1813 if (StringCmp (field, "StructuredCommentPrefix") == 0) continue;
1814 if (StringCmp (field, "StructuredCommentSuffix") == 0) continue;
1815 str = (CharPtr) curr->data.ptrvalue;
1816 if (StringHasNoText (str)) continue;
1817 ValNodeCopyStr (&head, 0, field);
1818 if (ajp->oldXmlPolicy) {
1819 ValNodeCopyStr (&head, 0, "\t");
1820 } else {
1821 ValNodeCopyStr (&head, 0, "\\t");
1822 }
1823 ValNodeCopyStr (&head, 0, str);
1824 if (ajp->oldXmlPolicy) {
1825 ValNodeCopyStr (&head, 0, "\n");
1826 } else {
1827 ValNodeCopyStr (&head, 0, "\\n");
1828 }
1829 }
1830
1831 if (StringDoesHaveText (suffix)) {
1832 ValNodeCopyStr (&head, 0, suffix);
1833 if (ajp->oldXmlPolicy) {
1834 ValNodeCopyStr (&head, 0, "\n");
1835 } else {
1836 ValNodeCopyStr (&head, 0, "\\n");
1837 }
1838 }
1839
1840 if (head == NULL) return NULL;
1841
1842 str = MergeFFValNodeStrs (head);
1843 ValNodeFreeData (head);
1844
1845 return str;
1846 }
1847
CountSlashableChars(CharPtr str)1848 static size_t CountSlashableChars (
1849 CharPtr str
1850 )
1851
1852 {
1853 Char ch;
1854 size_t count = 0;
1855
1856 if (str == NULL) return 0;
1857
1858 ch = *str;
1859 while (ch != '\0') {
1860 if (ch == '\n' || ch == '\r' || ch == '\t' || ch == '~' || ch == '\\') {
1861 count++;
1862 }
1863 str++;
1864 ch = *str;
1865 }
1866
1867 return count;
1868 }
1869
CatenateCommentInGbseq(IntAsn2gbJobPtr ajp,GBSeqPtr gbseq,CharPtr str,Boolean compress,Boolean protectSlash)1870 static void CatenateCommentInGbseq (
1871 IntAsn2gbJobPtr ajp,
1872 GBSeqPtr gbseq,
1873 CharPtr str,
1874 Boolean compress,
1875 Boolean protectSlash
1876 )
1877
1878 {
1879 Char ch;
1880 CharPtr cpy, dst, ptr, src, tmp;
1881
1882 if (ajp == NULL || gbseq == NULL || StringHasNoText (str)) return;
1883
1884 if (StringNCmp (str, "COMMENT ", 12) == 0) {
1885 str += 12;
1886 }
1887
1888 cpy = StringSave (str);
1889 if (cpy == NULL) return;
1890
1891 ptr = cpy;
1892 ch = *ptr;
1893 while (ch != '\0') {
1894 if (ch == '\n' || ch == '\r' || ch == '\t') {
1895 *ptr = ' ';
1896 }
1897 ptr++;
1898 ch = *ptr;
1899 }
1900
1901 if (compress) {
1902 Asn2gnbkCompressSpaces (cpy);
1903 }
1904
1905 if (! ajp->oldXmlPolicy) {
1906 tmp = (CharPtr) MemNew (StringLen (cpy) + CountSlashableChars (cpy) + 10);
1907 if (tmp == NULL) return;
1908
1909 dst = tmp;
1910 src = cpy;
1911 ch = *src;
1912 while (ch != '\0') {
1913 if (ch == '~') {
1914 *dst = '\\';
1915 dst++;
1916 *dst = 'n';
1917 dst++;
1918 src++;
1919 ch = *src;
1920 while (ch == ' ') {
1921 *dst = ch;
1922 dst++;
1923 src++;
1924 ch = *src;
1925 }
1926 } else if (ch == ' ') {
1927 *dst = ch;
1928 dst++;
1929 src++;
1930 ch = *src;
1931 while (ch == ' ') {
1932 src++;
1933 ch = *src;
1934 }
1935 } else if (ch == '\\' && protectSlash) {
1936 *dst = '\\';
1937 dst++;
1938 *dst = '\\';
1939 dst++;
1940 src++;
1941 ch = *src;
1942 } else {
1943 *dst = ch;
1944 dst++;
1945 src++;
1946 ch = *src;
1947 }
1948 }
1949 *dst = '\0';
1950
1951 MemFree (cpy);
1952 cpy = tmp;
1953 }
1954
1955 if (gbseq->comment == NULL) {
1956 gbseq->comment = cpy;
1957 } else {
1958 tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (cpy) + 10);
1959 if (tmp == NULL) return;
1960 StringCpy (tmp, gbseq->comment);
1961 if (ajp->oldXmlPolicy) {
1962 StringCat (tmp, "; ");
1963 } else {
1964 StringCat (tmp, "\\r");
1965 }
1966 StringCat (tmp, cpy);
1967 MemFree (cpy);
1968 gbseq->comment = MemFree (gbseq->comment);
1969 gbseq->comment = tmp;
1970 }
1971 }
1972
CommentTildes(CharPtr PNTR str)1973 static void CommentTildes (
1974 CharPtr PNTR str
1975 )
1976
1977 {
1978 #ifndef OS_MSWIN
1979 FindReplaceString (str, "nnotated by GenomeRefine~~", "nnotated by GenomeRefine", FALSE, FALSE);
1980 FindReplaceString (str, "based on SOLiD3 (Applied Biosystems)~~", "based on SOLiD3 (Applied Biosystems)", FALSE, FALSE);
1981 FindReplaceString (str, "Biological resourse center, NITE (NRBC)~~", "Biological resourse center, NITE (NRBC)", FALSE, FALSE);
1982 FindReplaceString (str, "developmental01.html~~", "developmental01.html", FALSE, FALSE);
1983 FindReplaceString (str, "http://bionano.toyo.ac.jp/~~", "http://bionano.toyo.ac.jp/", FALSE, FALSE);
1984 FindReplaceString (str, "http://dictycdb1.biol.tsukuba.ac.jp/acytodb/~~", "http://dictycdb1.biol.tsukuba.ac.jp/acytodb/", FALSE, FALSE);
1985 FindReplaceString (str, "http://egg.umh.es~~", "http://egg.umh.es", FALSE, FALSE);
1986 FindReplaceString (str, "http://www.aist.go.jp/~~", "http://www.aist.go.jp/", FALSE, FALSE);
1987 FindReplaceString (str, "http://www.bio.nite.go.jp/~~DOGAN ; Database", "http://www.bio.nite.go.jp/\n \nDOGAN ; Database", FALSE, FALSE);
1988 FindReplaceString (str, "http://www.bio.nite.go.jp/ngac/e/~~", "http://www.bio.nite.go.jp/ngac/e/", FALSE, FALSE);
1989 FindReplaceString (str, "http://www.brs.kyushu-u.ac.jp/~fcmic/~~", "http://www.brs.kyushu-u.ac.jp/~fcmic/", FALSE, FALSE);
1990 FindReplaceString (str, "http://www.miyazaki-u.ac.jp/ir/english/index.html~~", "http://www.miyazaki-u.ac.jp/ir/english/index.html", FALSE, FALSE);
1991 FindReplaceString (str, "URL:http://www.bio.nite.go.jp/~~", "URL:http://www.bio.nite.go.jp/", FALSE, FALSE);
1992 FindReplaceString (str, "RAST version 2.0 (http://rast.nmpdr.org/)~~", "RAST version 2.0 (http://rast.nmpdr.org/)", FALSE, FALSE);
1993 FindReplaceString (str, "URL:http://www.tmd.ac.jp/grad/bac/database.html~~", "URL:http://www.tmd.ac.jp/grad/bac/database.html", FALSE, FALSE);
1994 #endif
1995 }
1996
FormatCommentBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)1997 NLM_EXTERN CharPtr FormatCommentBlock (
1998 Asn2gbFormatPtr afp,
1999 BaseBlockPtr bbp
2000 )
2001
2002 {
2003 Boolean add_period;
2004 IntAsn2gbJobPtr ajp;
2005 Asn2gbSectPtr asp;
2006 Boolean as_string = FALSE;
2007 Boolean blank_before = FALSE;
2008 CommentBlockPtr cbp;
2009 Char ch;
2010 SeqMgrDescContext dcontext;
2011 CharPtr db;
2012 DbtagPtr dbt;
2013 Boolean do_gbseq = TRUE;
2014 SeqMgrFeatContext fcontext;
2015 GBSeqPtr gbseq;
2016 size_t len;
2017 ObjectIdPtr oip;
2018 CharPtr prefix;
2019 SeqDescrPtr sdp;
2020 SeqFeatPtr sfp;
2021 Char sfx [32];
2022 CharPtr str;
2023 CharPtr struc_comm_title = NULL;
2024 CharPtr suffix;
2025 CharPtr title;
2026 UserObjectPtr uop = NULL;
2027 StringItemPtr ffstring;
2028
2029 if (afp == NULL || bbp == NULL) return NULL;
2030 ajp = afp->ajp;
2031 if (ajp == NULL) return NULL;
2032 asp = afp->asp;
2033 if (asp == NULL) return NULL;
2034
2035 cbp = (CommentBlockPtr) bbp;
2036
2037 /* optionally populate gbseq for XML-ized GenBank format */
2038
2039 if (ajp->gbseq) {
2040 gbseq = &asp->gbseq;
2041 } else {
2042 gbseq = NULL;
2043 }
2044
2045 /* some comments are allocated (along with possible first COMMENT label) */
2046
2047 if (! StringHasNoText (bbp->string)) {
2048 str = StringSave (bbp->string);
2049 CatenateCommentInGbseq (ajp, gbseq, str, TRUE, FALSE);
2050 return str;
2051 }
2052
2053 title = NULL;
2054 prefix = NULL;
2055 suffix = NULL;
2056 add_period = FALSE;
2057 sfx [0] = '\0';
2058
2059 if (bbp->itemtype == OBJ_SEQDESC) {
2060
2061 /* usually should reference comment, maploc, or region descriptor IDs */
2062
2063 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
2064 if (sdp != NULL) {
2065
2066 if (dcontext.seqdesctype == Seq_descr_comment) {
2067
2068 title = (CharPtr) sdp->data.ptrvalue;
2069
2070 } else if (dcontext.seqdesctype == Seq_descr_maploc) {
2071
2072 dbt = (DbtagPtr) sdp->data.ptrvalue;
2073 if (dbt != NULL) {
2074 db = dbt->db;
2075 oip = dbt->tag;
2076 if (oip != NULL) {
2077 if (oip->str != NULL) {
2078
2079 title = oip->str;
2080 prefix = ("Map location: ");
2081
2082 } else if (db != NULL && oip->id != 0) {
2083
2084 title = db;
2085 prefix = ("Map location: (Database ");
2086 sprintf (sfx, "; id # %ld).", (long) oip->id);
2087 suffix = sfx;
2088
2089 }
2090 }
2091 }
2092
2093 } else if (dcontext.seqdesctype == Seq_descr_region) {
2094
2095 title = (CharPtr) sdp->data.ptrvalue;
2096 prefix = "Region: ";
2097
2098 } else if (dcontext.seqdesctype == Seq_descr_name) {
2099
2100 title = (CharPtr) sdp->data.ptrvalue;
2101 prefix = "Name: ";
2102
2103 } else if (dcontext.seqdesctype == Seq_descr_user) {
2104
2105 uop = (UserObjectPtr) sdp->data.ptrvalue;
2106 if (uop != NULL) {
2107 title = GetStrForStructuredComment (ajp, uop);
2108 if (title != NULL) {
2109 struc_comm_title = title;
2110 str = GetStructuredCommentTable (ajp, uop);
2111 CatenateCommentInGbseq (ajp, gbseq, str, TRUE, FALSE);
2112 MemFree (str);
2113 blank_before = TRUE;
2114 as_string = TRUE;
2115 do_gbseq = FALSE;
2116 }
2117 }
2118
2119 }
2120 }
2121
2122 } else if (bbp->itemtype == OBJ_SEQFEAT) {
2123
2124 /* also have to deal with comment feature across entire sequence */
2125
2126 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
2127 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_COMMENT) {
2128
2129 title = sfp->comment;
2130 }
2131 }
2132
2133 if (title == NULL) return NULL;
2134
2135 ffstring = FFGetString(ajp);
2136 if ( ffstring == NULL ) return NULL;
2137
2138 if (cbp->first) {
2139 FFStartPrint (ffstring, afp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
2140 } else {
2141 FFStartPrint (ffstring, afp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
2142 if (blank_before) {
2143 if (! cbp->no_blank_before) {
2144 FFAddOneString (ffstring, "\n", FALSE, FALSE, TILDE_EXPAND);
2145 }
2146 }
2147 }
2148
2149 str = StringSave (title);
2150
2151 if (StringDoesHaveText (str)) {
2152 CommentTildes (&str);
2153 }
2154
2155 TrimSpacesAndJunkFromEnds (str, TRUE);
2156
2157 /* remove trailing double tilde */
2158 /*
2159 len = StringLen (str);
2160 if (len > 5 && str [len-1] == '~' && str [len-2] == '~') {
2161 str [len-2] = '\0';
2162 }
2163 */
2164
2165 if (as_string) {
2166 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2167 } else {
2168 if (! IsEllipsis (str)) {
2169 s_RemovePeriodFromEnd (str);
2170 len = StringLen (str);
2171 if (len > 0) {
2172 ch = str [len - 1];
2173 if (ch != '.' && ch != '/' && ch != '~') {
2174 add_period = TRUE;
2175 }
2176 }
2177 }
2178 AddCommentWithURLlinks(ajp, ffstring, prefix, str, suffix);
2179 if (add_period) {
2180 FFAddOneChar (ffstring, '.', FALSE);
2181 }
2182 }
2183
2184 MemFree (str);
2185
2186 str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 5, 5, "CC");
2187
2188 if (do_gbseq) {
2189 CatenateCommentInGbseq (ajp, gbseq, title, ajp->oldXmlPolicy, TRUE);
2190 }
2191
2192 FFRecycleString(ajp, ffstring);
2193
2194 MemFree (struc_comm_title);
2195
2196 return str;
2197 }
2198
2199 /* format features section */
2200
is_real_id(SeqIdPtr sip,SeqIdPtr this_sip)2201 static Boolean is_real_id (
2202 SeqIdPtr sip,
2203 SeqIdPtr this_sip
2204 )
2205
2206 {
2207 BioseqPtr bsp;
2208
2209 if (sip == NULL || this_sip == NULL) return FALSE;
2210
2211 if (! SeqIdIn (sip, this_sip)) {
2212 bsp = BioseqFind (sip);
2213 if (bsp == NULL) return TRUE; /* ??? */
2214 if (bsp->repr == Seq_repr_virtual) return FALSE;
2215 }
2216
2217 return TRUE;
2218 }
2219
FlatVirtLoc(BioseqPtr bsp,SeqLocPtr location)2220 static Boolean FlatVirtLoc (
2221 BioseqPtr bsp,
2222 SeqLocPtr location
2223 )
2224
2225 {
2226 SeqIntPtr sintp;
2227 SeqIdPtr sip;
2228 SeqPntPtr spp;
2229
2230 if (bsp == NULL || location == NULL) return FALSE;
2231
2232 switch (location->choice) {
2233 case SEQLOC_WHOLE :
2234 sip = (SeqIdPtr) location->data.ptrvalue;
2235 if (sip == NULL) return TRUE;
2236 if (! is_real_id (sip, bsp->id)) return TRUE;
2237 break;
2238 case SEQLOC_INT :
2239 sintp = (SeqIntPtr) location->data.ptrvalue;
2240 if (sintp == NULL) return TRUE;
2241 sip = sintp->id;
2242 if (sip == NULL) return TRUE;
2243 if (! is_real_id (sip, bsp->id)) return TRUE;
2244 break;
2245 case SEQLOC_PNT :
2246 spp = (SeqPntPtr) location->data.ptrvalue;
2247 if (spp == NULL) return TRUE;
2248 sip = spp->id;
2249 if (sip == NULL) return TRUE;
2250 if (! is_real_id (sip, bsp->id)) return TRUE;
2251 break;
2252 default :
2253 break;
2254 }
2255
2256 return FALSE;
2257 }
2258
2259 static Uint1 id_order [NUM_SEQID];
2260 static Boolean order_initialized = FALSE;
2261
2262 static CharPtr lim_str [5] = { "", ">","<", ">", "<" };
2263
GetAccnVerFromServer(BIG_ID gi,CharPtr buf)2264 NLM_EXTERN Boolean GetAccnVerFromServer (BIG_ID gi, CharPtr buf)
2265
2266 {
2267 AccnVerLookupFunc func;
2268 SeqMgrPtr smp;
2269 CharPtr str;
2270
2271 if (buf == NULL) return FALSE;
2272 *buf = '\0';
2273 smp = SeqMgrWriteLock ();
2274 if (smp == NULL) return FALSE;
2275 func = smp->accn_ver_lookup_func;
2276 SeqMgrUnlock ();
2277 if (func == NULL) return FALSE;
2278 str = (*func) (gi);
2279 if (str == NULL) return FALSE;
2280 if (StringLen (str) < 40) {
2281 StringCpy (buf, str);
2282 }
2283 MemFree (str);
2284 return TRUE;
2285 }
2286
2287
2288 /******************************************************************************/
2289 /* FFFlatLoc functions . */
2290 /******************************************************************************/
2291
FF_FlatNullAhead(BioseqPtr bsp,ValNodePtr location)2292 static Boolean FF_FlatNullAhead (
2293 BioseqPtr bsp,
2294 ValNodePtr location
2295 )
2296
2297 {
2298 SeqLocPtr next;
2299
2300 if (bsp == NULL || location == NULL) return FALSE;
2301
2302 next = location->next;
2303 if (next == NULL) return TRUE;
2304 if (next->choice == SEQLOC_NULL) return TRUE;
2305 if (FlatVirtLoc (bsp, next)) return TRUE;
2306
2307 return FALSE;
2308 }
2309
2310
2311
FlatLocSeqId(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqIdPtr sip)2312 static void FlatLocSeqId (
2313 IntAsn2gbJobPtr ajp,
2314 StringItemPtr ffstring,
2315 SeqIdPtr sip
2316 )
2317
2318 {
2319 BioseqPtr bsp;
2320 Char buf [40];
2321 ObjectIdPtr oip;
2322 SeqIdPtr use_id = NULL;
2323 Boolean was_lock = FALSE;
2324
2325 if (ffstring == NULL || sip == NULL) return;
2326
2327 buf [0] = '\0';
2328 bsp = BioseqFind (sip);
2329 if (bsp != NULL) {
2330 use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
2331 } else if (sip->choice == SEQID_GI) {
2332 if (GetAccnVerFromServer (sip->data.intvalue, buf)) {
2333 FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
2334 /*AddValNodeString (head, NULL, buf, ":");*/
2335 return;
2336 }
2337 use_id = GetSeqIdForGI (sip->data.intvalue);
2338 }
2339 if (use_id == NULL && bsp == NULL) {
2340 bsp = BioseqLockById (sip);
2341 was_lock = TRUE;
2342 if (bsp != NULL) {
2343 use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
2344 }
2345 }
2346 if (use_id != NULL) {
2347 SeqIdWrite (use_id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
2348 if (use_id->choice == SEQID_GI) {
2349 ajp->relModeError = TRUE;
2350 }
2351 } else if (sip->choice == SEQID_GI) {
2352 SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
2353 ajp->relModeError = TRUE;
2354 } else {
2355 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
2356 if (sip->choice == SEQID_GI) {
2357 ajp->relModeError = TRUE;
2358 }
2359 }
2360 if (was_lock) {
2361 BioseqUnlock (bsp);
2362 }
2363 if (StringHasNoText (buf)) {
2364 StringCpy (buf, "?00000");
2365 ajp->relModeError = TRUE;
2366 if (use_id != NULL && use_id->choice == SEQID_LOCAL) {
2367 oip = (ObjectIdPtr) use_id->data.ptrvalue;
2368 if (oip != NULL && (! StringHasNoText (oip->str))) {
2369 StringNCpy_0 (buf, oip->str, 13);
2370 }
2371 }
2372 }
2373 FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
2374 }
2375
2376
2377
FlatLocCaret(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqIdPtr sip,SeqIdPtr this_sip,Int4 point,IntFuzzPtr fuzz)2378 static void FlatLocCaret (
2379 IntAsn2gbJobPtr ajp,
2380 StringItemPtr ffstring,
2381 SeqIdPtr sip,
2382 SeqIdPtr this_sip,
2383 Int4 point,
2384 IntFuzzPtr fuzz
2385 )
2386
2387 {
2388 Char buf [128];
2389 Uint1 index;
2390
2391 if (ffstring == NULL) return;
2392
2393 if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
2394 FlatLocSeqId (ajp, ffstring, sip);
2395 }
2396
2397 buf [0] = '\0';
2398 point++; /* orginal FlatLocHalfCaret was called with point + 1 */
2399
2400 if (fuzz != NULL) {
2401 switch (fuzz->choice) {
2402 case 1 :
2403 sprintf (buf, "(%ld.%ld)..(%ld.%ld)",
2404 (long) (point - fuzz->a),
2405 (long) point,
2406 (long) point,
2407 (long) (point + fuzz->a));
2408 break;
2409 case 2 :
2410 sprintf (buf, "%ld^%ld",
2411 (long) (1 + fuzz->b),
2412 (long) (1 + fuzz->a));
2413 break;
2414 case 3 :
2415 sprintf (buf, "%ld^%ld",
2416 (long) (point - point * ((double) fuzz->a / 1000.0)),
2417 (long) (point + point * ((double) fuzz->a / 1000.0)));
2418 break;
2419 case 4 :
2420 if (fuzz->a == 3) { /* space to right */
2421 sprintf (buf, "%ld^%ld", (long) (point), (long) (point + 1));
2422 } else if (fuzz->a == 4 && point > 1) { /* space to left */
2423 sprintf (buf, "%ld^%ld", (long) (point - 1), (long) point);
2424 } else {
2425 index = (Uint1) fuzz->a;
2426 if (index > 4) {
2427 index = 0;
2428 }
2429 sprintf (buf, "%s%ld", lim_str [index], (long) point);
2430 }
2431 break;
2432 default :
2433 sprintf (buf, "%ld", (long) point);
2434 break;
2435 }
2436 } else {
2437 sprintf (buf, "%ld", (long) point);
2438 }
2439
2440 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2441 }
2442
2443
FlatLocPoint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqIdPtr sip,SeqIdPtr this_sip,Int4 point,IntFuzzPtr fuzz)2444 static void FlatLocPoint (
2445 IntAsn2gbJobPtr ajp,
2446 StringItemPtr ffstring,
2447 SeqIdPtr sip,
2448 SeqIdPtr this_sip,
2449 Int4 point,
2450 IntFuzzPtr fuzz
2451 )
2452
2453 {
2454 Char buf [128];
2455 Uint1 index;
2456
2457 if (ffstring == NULL) return;
2458
2459 if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
2460 FlatLocSeqId (ajp, ffstring, sip);
2461 }
2462
2463 buf [0] = '\0';
2464 point++;
2465
2466 if (fuzz != NULL) {
2467 switch (fuzz->choice) {
2468 case 1 :
2469 sprintf (buf, "(%ld.%ld)",
2470 (long) (point - fuzz->a),
2471 (long) (point + fuzz->a));
2472 break;
2473 case 2 :
2474 sprintf (buf, "(%ld.%ld)",
2475 (long) (1 + fuzz->b),
2476 (long) (1 + fuzz->a));
2477 break;
2478 case 3 :
2479 sprintf (buf, "(%ld.%ld)",
2480 (long) (point - point * ((double) fuzz->a / 1000.0)),
2481 (long) (point + point * ((double) fuzz->a / 1000.0)));
2482 break;
2483 case 4 :
2484 index = (Uint1) fuzz->a;
2485 if (index > 4) {
2486 index = 0;
2487 }
2488 sprintf (buf, "%s%ld", lim_str [index], (long) point);
2489 break;
2490 default :
2491 sprintf (buf, "%ld", (long) point);
2492 break;
2493 }
2494 } else {
2495 sprintf (buf, "%ld", (long) point);
2496 }
2497
2498 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2499 }
2500
2501
FlatLocElement(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,Boolean isGap)2502 static void FlatLocElement (
2503 IntAsn2gbJobPtr ajp,
2504 StringItemPtr ffstring,
2505 BioseqPtr bsp,
2506 SeqLocPtr location,
2507 Boolean isGap
2508
2509 )
2510
2511 {
2512 Boolean minus_strand = FALSE;
2513 SeqBondPtr sbp;
2514 SeqIntPtr sintp;
2515 SeqIdPtr sip;
2516 SeqPntPtr spp;
2517 BioseqPtr wholebsp;
2518
2519 if (ffstring == NULL || bsp == NULL || location == NULL) return;
2520
2521 switch (location->choice) {
2522 case SEQLOC_WHOLE :
2523 sip = (SeqIdPtr) location->data.ptrvalue;
2524 if (sip == NULL) return;
2525 wholebsp = BioseqFind (sip);
2526 if (wholebsp == NULL) return;
2527 if (is_real_id (sip, bsp->id)) {
2528 FlatLocPoint (ajp, ffstring, sip, bsp->id, 0, NULL);
2529 if (bsp->length > 0) {
2530 FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
2531 FlatLocPoint (ajp, ffstring, NULL, bsp->id, bsp->length - 1, NULL);
2532 }
2533 }
2534 break;
2535 case SEQLOC_INT :
2536 sintp = (SeqIntPtr) location->data.ptrvalue;
2537 if (sintp == NULL) return;
2538 sip = sintp->id;
2539 if (sip == NULL) return;
2540 if (is_real_id (sip, bsp->id)) {
2541 minus_strand = (Boolean) (sintp->strand == Seq_strand_minus);
2542 if (minus_strand) {
2543 FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2544 }
2545 FlatLocPoint (ajp, ffstring, sip, bsp->id, sintp->from, sintp->if_from);
2546 if (sintp->to > 0 &&
2547 (sintp->to != sintp->from ||
2548 sintp->if_from != NULL ||
2549 sintp->if_to != NULL) ||
2550 isGap) {
2551 FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
2552 FlatLocPoint (ajp, ffstring, NULL, bsp->id, sintp->to, sintp->if_to);
2553 }
2554 if (minus_strand) {
2555 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2556 }
2557 }
2558 break;
2559 case SEQLOC_PNT :
2560 spp = (SeqPntPtr) location->data.ptrvalue;
2561 if (spp == NULL) return;
2562 sip = spp->id;
2563 if (sip == NULL) return;
2564 if (is_real_id (sip, bsp->id)) {
2565 minus_strand = (Boolean) (spp->strand == Seq_strand_minus);
2566 if (minus_strand) {
2567 FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2568 }
2569 if (spp->fuzz != NULL) {
2570 FlatLocCaret (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
2571 } else {
2572 FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, NULL);
2573 }
2574 if (minus_strand) {
2575 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2576 }
2577 }
2578 break;
2579 case SEQLOC_BOND :
2580 sbp = (SeqBondPtr) location->data.ptrvalue;
2581 if (sbp == NULL) return;
2582 spp = sbp->a;
2583 if (spp == NULL) return;
2584 sip = spp->id;
2585 if (sip == NULL) return;
2586 FFAddOneString(ffstring, "bond(", FALSE, FALSE, TILDE_IGNORE);
2587 FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
2588 spp = sbp->b;
2589 if (spp != NULL) {
2590 FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2591 FlatLocPoint (ajp, ffstring, NULL, bsp->id, spp->point, spp->fuzz);
2592 }
2593 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2594 break;
2595 default :
2596 /* unexpected internal complex type or unimplemented SEQLOC_FEAT */
2597 return;
2598 }
2599 }
2600
2601
2602
FF_FlatPackedPoint(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,PackSeqPntPtr pspp,BioseqPtr bsp,Boolean isGap)2603 static void FF_FlatPackedPoint (
2604 IntAsn2gbJobPtr ajp,
2605 StringItemPtr ffstring,
2606 PackSeqPntPtr pspp,
2607 BioseqPtr bsp,
2608 Boolean isGap
2609 )
2610
2611 {
2612 Uint1 dex;
2613
2614 if (ffstring == NULL || pspp == NULL || bsp == NULL) return;
2615
2616 for (dex = 0; dex < pspp->used; dex++) {
2617 FlatLocPoint (ajp, ffstring, pspp->id, bsp->id, pspp->pnts [dex], pspp->fuzz);
2618 }
2619 }
2620
2621
2622 static void FF_DoFlatLoc (
2623 IntAsn2gbJobPtr ajp,
2624 StringItemPtr ffstring,
2625 BioseqPtr bsp,
2626 SeqLocPtr location,
2627 Boolean ok_to_complement,
2628 Boolean isGap
2629 );
2630
FF_GroupFlatLoc(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,CharPtr prefix,Boolean is_flat_order,Boolean isGap)2631 static void FF_GroupFlatLoc (
2632 IntAsn2gbJobPtr ajp,
2633 StringItemPtr ffstring,
2634 BioseqPtr bsp,
2635 SeqLocPtr location,
2636 CharPtr prefix,
2637 Boolean is_flat_order,
2638 Boolean isGap
2639 )
2640
2641 {
2642 Boolean found_non_virt = FALSE;
2643 SeqIdPtr hold_next;
2644 Int2 parens = 1;
2645 PackSeqPntPtr pspp;
2646 SeqLocPtr slp;
2647 Boolean special_mode = FALSE; /* join in order */
2648
2649 if (ffstring == NULL || bsp == NULL || location == NULL) return;
2650
2651 /* prefix will have the first parenthesis */
2652
2653 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2654
2655 for (slp = (SeqLocPtr) location->data.ptrvalue; slp != NULL; slp = slp->next) {
2656
2657 if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) {
2658 if (slp != location && slp->next != NULL) {
2659 if (special_mode) {
2660 special_mode = FALSE;
2661 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2662 parens--;
2663 }
2664 }
2665 continue;
2666 }
2667
2668 if (found_non_virt && slp->choice != SEQLOC_EMPTY && slp->choice != SEQLOC_NULL) {
2669 FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2670 }
2671
2672 switch (slp->choice) {
2673 case SEQLOC_WHOLE :
2674 case SEQLOC_PNT :
2675 case SEQLOC_BOND :
2676 case SEQLOC_FEAT :
2677 found_non_virt = TRUE;
2678 if (FlatVirtLoc (bsp, slp)) {
2679 if (slp != location && slp->next != NULL) {
2680 if (special_mode) {
2681 special_mode = FALSE;
2682 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2683 parens--;
2684 }
2685 }
2686 } else {
2687 FlatLocElement (ajp, ffstring, bsp, slp, isGap);
2688 }
2689 break;
2690 case SEQLOC_INT :
2691 found_non_virt = TRUE;
2692 if (is_flat_order && (! FF_FlatNullAhead (bsp, slp))) {
2693 special_mode = TRUE;
2694 FFAddOneString(ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
2695 parens++;
2696 }
2697 FlatLocElement (ajp, ffstring, bsp, slp, isGap);
2698 break;
2699 case SEQLOC_PACKED_PNT :
2700 found_non_virt = TRUE;
2701 pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2702 if (pspp != NULL) {
2703 FF_FlatPackedPoint (ajp, ffstring, pspp, bsp, isGap);
2704 }
2705 break;
2706 case SEQLOC_PACKED_INT :
2707 case SEQLOC_MIX :
2708 case SEQLOC_EQUIV :
2709 found_non_virt = TRUE;
2710 hold_next = slp->next;
2711 slp->next = NULL;
2712 FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE, isGap);
2713 slp->next = hold_next;
2714 break;
2715 default :
2716 break;
2717 }
2718
2719 }
2720
2721 while (parens > 0) {
2722 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2723 parens--;
2724 }
2725 }
2726
2727
2728
2729
FF_DoFlatLoc(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,Boolean ok_to_complement,Boolean isGap)2730 static void FF_DoFlatLoc (
2731 IntAsn2gbJobPtr ajp,
2732 StringItemPtr ffstring,
2733 BioseqPtr bsp,
2734 SeqLocPtr location,
2735 Boolean ok_to_complement,
2736 Boolean isGap
2737
2738 )
2739
2740 {
2741 Boolean found_null;
2742 SeqLocPtr next_loc;
2743 PackSeqPntPtr pspp;
2744 SeqLocPtr slp;
2745
2746 if (ffstring == NULL || bsp == NULL || location == NULL) return;
2747
2748 /* deal with complement of entire location */
2749
2750 if (ok_to_complement && SeqLocStrand (location) == Seq_strand_minus) {
2751 slp = AsnIoMemCopy ((Pointer) location,
2752 (AsnReadFunc) SeqLocAsnRead,
2753 (AsnWriteFunc) SeqLocAsnWrite);
2754 if (slp != NULL) {
2755 SeqLocRevCmp (slp);
2756 FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2757 FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE, isGap);
2758 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2759 }
2760 SeqLocFree (slp);
2761 return;
2762 }
2763
2764 /* handle each location component */
2765
2766 for (slp = location; slp != NULL; slp = slp->next) {
2767
2768 if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) continue;
2769
2770 /* print comma between components */
2771
2772 if (slp != location) {
2773 FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2774 }
2775
2776 switch (slp->choice) {
2777 case SEQLOC_MIX :
2778 case SEQLOC_PACKED_INT :
2779 found_null = FALSE;
2780 for (next_loc = (SeqLocPtr) slp->data.ptrvalue;
2781 next_loc != NULL;
2782 next_loc = next_loc->next) {
2783 if (next_loc->choice == SEQLOC_NULL ||
2784 FlatVirtLoc (bsp, next_loc) /* ||
2785 LocationHasNullsBetween (slp) */ )
2786 found_null = TRUE;
2787 }
2788 if (found_null) {
2789 FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "order(", TRUE, isGap);
2790 } else {
2791 FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "join(", FALSE, isGap);
2792 }
2793 break;
2794 case SEQLOC_EQUIV :
2795 FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "one-of(", FALSE, isGap);
2796 break;
2797 case SEQLOC_PACKED_PNT :
2798 pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2799 if (pspp != NULL) {
2800 FF_FlatPackedPoint (ajp, ffstring, pspp, bsp, isGap);
2801 }
2802 break;
2803 default :
2804 FlatLocElement (ajp, ffstring, bsp, slp, isGap);
2805 break;
2806 }
2807
2808 }
2809 }
2810
FF_DoFlatLocEx(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,BioseqPtr bsp,SeqLocPtr location,Boolean ok_to_complement,Boolean isGap,Boolean swapPartials)2811 static void FF_DoFlatLocEx (
2812 IntAsn2gbJobPtr ajp,
2813 StringItemPtr ffstring,
2814 BioseqPtr bsp,
2815 SeqLocPtr location,
2816 Boolean ok_to_complement,
2817 Boolean isGap,
2818 Boolean swapPartials
2819 )
2820
2821 {
2822 Boolean partial5, partial3;
2823 SeqLocPtr slp;
2824
2825 if (location == NULL) return;
2826
2827 if (! swapPartials) {
2828 FF_DoFlatLoc (ajp, ffstring, bsp, location, ok_to_complement, isGap);
2829 return;
2830 }
2831
2832 slp = (SeqLocPtr) AsnIoMemCopy ((Pointer) location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
2833 if (slp == NULL) return;
2834 CheckSeqLocForPartial (slp, &partial5, &partial3);
2835 SetSeqLocPartial (slp, partial3, partial5);
2836 FF_DoFlatLoc (ajp, ffstring, bsp, slp, ok_to_complement, isGap);
2837 SeqLocFree (slp);
2838 }
2839
2840
FFFlatLoc(IntAsn2gbJobPtr ajp,BioseqPtr bsp,SeqLocPtr location,Boolean masterStyle,Boolean isGap)2841 NLM_EXTERN CharPtr FFFlatLoc (
2842 IntAsn2gbJobPtr ajp,
2843 BioseqPtr bsp,
2844 SeqLocPtr location,
2845 Boolean masterStyle,
2846 Boolean isGap
2847 )
2848
2849 {
2850 Boolean hasNulls;
2851 IntFuzzPtr fuzz = NULL;
2852 SeqLocPtr loc;
2853 Boolean minus_strand = FALSE;
2854 Boolean noLeft;
2855 Boolean noRight;
2856 Uint1 num = 1;
2857 ValNodePtr partiallist = NULL, emptypartials = NULL;
2858 SeqPntPtr spp;
2859 CharPtr str;
2860 SeqLocPtr tmp;
2861 StringItemPtr ffstring = NULL;
2862
2863 if (ajp == NULL || bsp == NULL || location == NULL) return NULL;
2864
2865 ffstring = FFGetString(ajp);
2866
2867 if (! order_initialized) {
2868 id_order [SEQID_GENBANK] = num++;
2869 id_order [SEQID_EMBL] = num++;
2870 id_order [SEQID_DDBJ] = num++;
2871 id_order [SEQID_OTHER] = num++;
2872 id_order [SEQID_TPG] = num++;
2873 id_order [SEQID_TPE] = num++;
2874 id_order [SEQID_TPD] = num++;
2875 id_order [SEQID_GPIPE] = num++;
2876 id_order [SEQID_GIBBSQ] = num++;
2877 id_order [SEQID_GIBBMT] = num++;
2878 id_order [SEQID_PRF] = num++;
2879 id_order [SEQID_PDB] = num++;
2880 id_order [SEQID_PIR] = num++;
2881 id_order [SEQID_SWISSPROT] = num++;
2882 id_order [SEQID_PATENT] = num++;
2883 id_order [SEQID_GI] = num++;;
2884 id_order [SEQID_GENERAL] = num++;
2885 id_order [SEQID_LOCAL] = num++;
2886 id_order [SEQID_GIIM] = num++;
2887 order_initialized = TRUE;
2888 }
2889
2890 if (ajp->ajp.slp != NULL) {
2891 minus_strand = (Boolean) (SeqLocStrand (ajp->ajp.slp) == Seq_strand_minus);
2892 }
2893
2894 if (ajp->smallGenomeSet) {
2895 FF_DoFlatLocEx (ajp, ffstring, bsp, location, TRUE, isGap, minus_strand);
2896 } else if (masterStyle) {
2897
2898 /* map location from parts to segmented bioseq */
2899
2900 if (location->choice == SEQLOC_PNT) {
2901 spp = (SeqPntPtr) location->data.ptrvalue;
2902 if (spp != NULL) {
2903 fuzz = spp->fuzz;
2904 }
2905 }
2906
2907 partiallist = GetSeqLocPartialSet (location);
2908 CheckSeqLocForPartial (location, &noLeft, &noRight);
2909 hasNulls = LocationHasNullsBetween (location);
2910 loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, ajp->relaxedMapping);
2911 if (loc == NULL) {
2912 tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
2913 loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, ajp->relaxedMapping);
2914 SeqLocFree (tmp);
2915 }
2916 if (loc == NULL) {
2917 ValNodeFree (partiallist);
2918 return StringSave ("?");
2919 }
2920 emptypartials = GetSeqLocPartialSet (loc);
2921 FreeAllFuzz (loc);
2922 SetSeqLocPartial (loc, noLeft, noRight);
2923 if (ValNodeLen (partiallist) == ValNodeLen (emptypartials)) {
2924 SetSeqLocPartialSet (loc, partiallist);
2925 }
2926 ValNodeFree (partiallist);
2927 ValNodeFree (emptypartials);
2928
2929 if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
2930 spp = (SeqPntPtr) loc->data.ptrvalue;
2931 if (spp != NULL && spp->fuzz == NULL) {
2932 spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
2933 (AsnReadFunc) IntFuzzAsnRead,
2934 (AsnWriteFunc) IntFuzzAsnWrite);
2935 }
2936 }
2937
2938 FF_DoFlatLocEx (ajp, ffstring, bsp, loc, TRUE, isGap, minus_strand);
2939
2940 SeqLocFree (loc);
2941
2942 } else {
2943 FF_DoFlatLocEx (ajp, ffstring, bsp, location, TRUE, isGap, minus_strand);
2944 }
2945
2946 str = FFToCharPtr(ffstring);
2947 FFRecycleString(ajp, ffstring);
2948 return str;
2949 }
2950
2951
2952
2953
PromoteSeqId(SeqIdPtr sip,Pointer userdata)2954 static void PromoteSeqId (SeqIdPtr sip, Pointer userdata)
2955
2956 {
2957 SeqIdPtr bestid, newid, oldid;
2958
2959 bestid = (SeqIdPtr) userdata;
2960
2961 newid = SeqIdDup (bestid);
2962 if (newid == NULL) return;
2963
2964 oldid = ValNodeNew (NULL);
2965 if (oldid == NULL) return;
2966
2967 MemCopy (oldid, sip, sizeof (ValNode));
2968 oldid->next = NULL;
2969
2970 sip->choice = newid->choice;
2971 sip->data.ptrvalue = newid->data.ptrvalue;
2972
2973 SeqIdFree (oldid);
2974 ValNodeFree (newid);
2975
2976 SeqIdStripLocus (sip);
2977 }
2978
SeqLocReMapEx(SeqIdPtr newid,SeqLocPtr seq_loc,SeqLocPtr location,Int4 offset,Boolean rev,Boolean masterStyle,Boolean relaxed)2979 NLM_EXTERN SeqLocPtr SeqLocReMapEx (
2980 SeqIdPtr newid,
2981 SeqLocPtr seq_loc,
2982 SeqLocPtr location,
2983 Int4 offset,
2984 Boolean rev,
2985 Boolean masterStyle,
2986 Boolean relaxed
2987 )
2988
2989 {
2990 BioseqPtr bsp;
2991 Boolean hasNulls;
2992 IntFuzzPtr fuzz = NULL;
2993 SeqLocPtr loc;
2994 Boolean noLeft;
2995 Boolean noRight;
2996 SeqEntryPtr scope;
2997 SeqIdPtr sip;
2998 SeqLocPtr slp = NULL;
2999 SeqPntPtr spp;
3000 SeqLocPtr tmp;
3001
3002 if (newid == NULL || seq_loc == NULL || location == NULL) return NULL;
3003
3004 if (masterStyle) {
3005
3006 sip = SeqLocId (seq_loc);
3007 if (sip == NULL) return NULL;
3008 bsp = BioseqFind (sip);
3009 if (bsp == NULL) {
3010 scope = SeqEntrySetScope (NULL);
3011 bsp = BioseqFind (sip);
3012 SeqEntrySetScope (scope);
3013 }
3014 if (bsp == NULL) return NULL;
3015 sip = SeqIdFindBest (bsp->id, 0);
3016
3017 /* map location from parts to segmented bioseq */
3018
3019 if (location->choice == SEQLOC_PNT) {
3020 spp = (SeqPntPtr) location->data.ptrvalue;
3021 if (spp != NULL) {
3022 fuzz = spp->fuzz;
3023 }
3024 }
3025
3026 CheckSeqLocForPartial (location, &noLeft, &noRight);
3027 hasNulls = LocationHasNullsBetween (location);
3028 loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE, relaxed);
3029 if (loc == NULL) {
3030 tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
3031 loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE, relaxed);
3032 SeqLocFree (tmp);
3033 }
3034 if (loc == NULL) {
3035 return NULL;
3036 }
3037 FreeAllFuzz (loc);
3038 SetSeqLocPartial (loc, noLeft, noRight);
3039
3040 if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
3041 spp = (SeqPntPtr) loc->data.ptrvalue;
3042 if (spp != NULL && spp->fuzz == NULL) {
3043 spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
3044 (AsnReadFunc) IntFuzzAsnRead,
3045 (AsnWriteFunc) IntFuzzAsnWrite);
3046 }
3047 }
3048
3049 scope = SeqEntrySetScope (NULL);
3050 slp = SeqLocReMap (newid, seq_loc, loc, offset, rev);
3051 SeqEntrySetScope (scope);
3052
3053 SeqLocFree (loc);
3054
3055 VisitSeqIdsInSeqLoc (slp, (Pointer) sip, PromoteSeqId);
3056 } else {
3057
3058 scope = SeqEntrySetScope (NULL);
3059 slp = SeqLocReMap (newid, seq_loc, location, offset, rev);
3060 SeqEntrySetScope (scope);
3061 }
3062
3063 return slp;
3064 }
3065
3066
3067 /******************************************************************************/
3068 /* End FFFlatLoc functions. */
3069 /******************************************************************************/
3070
3071
3072
SubSourceToQualArray(SubSourcePtr ssp,QualValPtr qvp)3073 static void SubSourceToQualArray (
3074 SubSourcePtr ssp,
3075 QualValPtr qvp
3076 )
3077
3078 {
3079 SourceType idx;
3080 Uint1 subtype;
3081
3082 if (ssp == NULL || qvp == NULL) return;
3083
3084 while (ssp != NULL) {
3085 subtype = ssp->subtype;
3086 if (subtype == 255) {
3087 subtype = 44;
3088 }
3089 if (subtype < 45) {
3090 idx = subSourceToSourceIdx [subtype];
3091 if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
3092 if (qvp [idx].ssp == NULL) {
3093 qvp [idx].ssp = ssp;
3094 }
3095 }
3096 }
3097 ssp = ssp->next;
3098 }
3099 }
3100
3101 NLM_EXTERN SourceType orgModToSourceIdx [42] = {
3102 SCQUAL_zero_orgmod,
3103 SCQUAL_one_orgmod,
3104 SCQUAL_strain,
3105 SCQUAL_sub_strain,
3106 SCQUAL_type,
3107 SCQUAL_sub_type,
3108 SCQUAL_variety,
3109 SCQUAL_serotype,
3110 SCQUAL_serogroup,
3111 SCQUAL_serovar,
3112 SCQUAL_cultivar,
3113 SCQUAL_pathovar,
3114 SCQUAL_chemovar,
3115 SCQUAL_biovar,
3116 SCQUAL_biotype,
3117 SCQUAL_group,
3118 SCQUAL_sub_group,
3119 SCQUAL_isolate,
3120 SCQUAL_common,
3121 SCQUAL_acronym,
3122 SCQUAL_dosage,
3123 SCQUAL_spec_or_nat_host,
3124 SCQUAL_sub_species,
3125 SCQUAL_specimen_voucher,
3126 SCQUAL_authority,
3127 SCQUAL_forma,
3128 SCQUAL_forma_specialis,
3129 SCQUAL_ecotype,
3130 SCQUAL_synonym,
3131 SCQUAL_anamorph,
3132 SCQUAL_teleomorph,
3133 SCQUAL_breed,
3134 SCQUAL_gb_acronym,
3135 SCQUAL_gb_anamorph,
3136 SCQUAL_gb_synonym,
3137 SCQUAL_culture_collection,
3138 SCQUAL_bio_material,
3139 SCQUAL_metagenome_source,
3140 SCQUAL_type_material,
3141 SCQUAL_old_lineage,
3142 SCQUAL_old_name,
3143 SCQUAL_orgmod_note
3144 };
3145
OrgModToQualArray(OrgModPtr omp,QualValPtr qvp)3146 static void OrgModToQualArray (
3147 OrgModPtr omp,
3148 QualValPtr qvp
3149 )
3150
3151 {
3152 SourceType idx;
3153 Uint1 subtype;
3154
3155 if (omp == NULL || qvp == NULL) return;
3156
3157 while (omp != NULL) {
3158 subtype = omp->subtype;
3159 if (subtype == 253) {
3160 subtype = 39;
3161 } else if (subtype == 254) {
3162 subtype = 40;
3163 } else if (subtype == 255) {
3164 subtype = 41;
3165 }
3166 if (subtype < 42) {
3167 idx = orgModToSourceIdx [subtype];
3168 if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
3169 if (qvp [idx].omp == NULL) {
3170 qvp [idx].omp = omp;
3171 }
3172 }
3173 }
3174 omp = omp->next;
3175 }
3176 }
3177
3178 static CharPtr organelleQual [] = {
3179 NULL,
3180 NULL,
3181 "/organelle=\"plastid:chloroplast\"",
3182 "/organelle=\"plastid:chromoplast\"",
3183 "/organelle=\"mitochondrion:kinetoplast\"",
3184 "/organelle=\"mitochondrion\"",
3185 "/organelle=\"plastid\"",
3186 "/macronuclear",
3187 NULL,
3188 "/plasmid=\"\"",
3189 "/transposon=\"\"",
3190 "/insertion_seq=\"\"",
3191 "/organelle=\"plastid:cyanelle\"",
3192 "/proviral",
3193 NULL,
3194 "/organelle=\"nucleomorph\"",
3195 "/organelle=\"plastid:apicoplast\"",
3196 "/organelle=\"plastid:leucoplast\"",
3197 "/organelle=\"plastid:proplastid\"",
3198 NULL,
3199 "/organelle=\"hydrogenosome\"",
3200 NULL,
3201 "/organelle=\"chromatophore\""
3202 };
3203
StringIsJustQuotes(CharPtr str)3204 NLM_EXTERN Boolean StringIsJustQuotes (
3205 CharPtr str
3206 )
3207
3208 {
3209 Nlm_Uchar ch; /* to use 8bit characters in multibyte languages */
3210
3211 if (str != NULL) {
3212 ch = *str;
3213 while (ch != '\0') {
3214 if (ch > ' ' && ch != '"' && ch != '\'') {
3215 return FALSE;
3216 }
3217 str++;
3218 ch = *str;
3219 }
3220 }
3221 return TRUE;
3222 }
3223
RemoveAllSpaces(CharPtr str)3224 static CharPtr RemoveAllSpaces (
3225 CharPtr str
3226 )
3227
3228 {
3229 Char ch;
3230 CharPtr dst;
3231 CharPtr ptr;
3232
3233 if (str == NULL || str [0] == '\0') return NULL;
3234
3235 dst = str;
3236 ptr = str;
3237 ch = *ptr;
3238 while (ch != '\0') {
3239 if (ch != ' ') {
3240 *dst = ch;
3241 dst++;
3242 }
3243 ptr++;
3244 ch = *ptr;
3245 }
3246 *dst = '\0';
3247
3248 return str;
3249 }
3250
AddFeatureToGbseq(GBSeqPtr gbseq,GBFeaturePtr gbfeat,CharPtr str,SeqFeatPtr sfp)3251 NLM_EXTERN void AddFeatureToGbseq (
3252 GBSeqPtr gbseq,
3253 GBFeaturePtr gbfeat,
3254 CharPtr str,
3255 SeqFeatPtr sfp
3256 )
3257
3258 {
3259 Char ch;
3260 CharPtr copy;
3261 GBQualifierPtr gbqual;
3262 GBQualifierPtr last = NULL;
3263 CharPtr ptr;
3264 CharPtr qual;
3265 CharPtr tmp;
3266 CharPtr val;
3267
3268 if (gbseq == NULL || gbfeat == NULL || StringHasNoText (str)) return;
3269
3270 copy = StringSave (str);
3271
3272 /* link in reverse order, to be reversed in slash block */
3273
3274 gbfeat->next = gbseq->feature_table;
3275 gbseq->feature_table = gbfeat;
3276
3277 /* now parse qualifiers */
3278
3279 ptr = StringStr (copy, " /");
3280 while (ptr != NULL) {
3281 qual = ptr + 22;
3282 val = qual;
3283 ch = *val;
3284 while (ch != '=' && ch != '\n' && ch != '\0') {
3285 val++;
3286 ch = *val;
3287 }
3288 /*
3289 val = StringChr (qual, '=');
3290 if (val == NULL) {
3291 val = StringChr (qual, '\n');
3292 }
3293 */
3294 if (ch != '\0' /* val != NULL */) {
3295 *val = '\0';
3296 val++;
3297 if (ch == '=') {
3298 tmp = val;
3299 if (*val == '"') {
3300 val++;
3301 tmp = val;
3302 ch = *tmp;
3303 while (ch != '"' && ch != '\0') {
3304 tmp++;
3305 ch = *tmp;
3306 }
3307 }
3308 ptr = StringStr (tmp, "\n /");
3309 if (ptr != NULL) {
3310 *ptr = '\0';
3311 ptr++;
3312 }
3313 } else {
3314 ptr = StringStr (val, " /");
3315 val = NULL;
3316 }
3317 gbqual = GBQualifierNew ();
3318 if (gbqual != NULL) {
3319 gbqual->name = StringSave (qual);
3320 if (! StringHasNoText (val)) {
3321 gbqual->value = StringSave (val);
3322 CleanQualValue (gbqual->value);
3323 Asn2gnbkCompressSpaces (gbqual->value);
3324 if (sfp != NULL) {
3325 if (sfp->data.choice == SEQFEAT_CDREGION &&
3326 StringICmp (qual, "translation") == 0) {
3327 RemoveAllSpaces (gbqual->value);
3328 } else if (sfp->data.choice == SEQFEAT_CDREGION &&
3329 StringICmp (qual, "coded_by") == 0) {
3330 RemoveAllSpaces (gbqual->value);
3331 } else if (sfp->data.choice == SEQFEAT_RNA &&
3332 StringICmp (qual, "transcription") == 0) {
3333 RemoveAllSpaces (gbqual->value);
3334 } else if (sfp->data.choice == SEQFEAT_PROT &&
3335 StringICmp (qual, "peptide") == 0) {
3336 RemoveAllSpaces (gbqual->value);
3337 } else if (sfp->data.choice == SEQFEAT_PROT &&
3338 StringICmp (qual, "derived_from") == 0) {
3339 RemoveAllSpaces (gbqual->value);
3340 }
3341 }
3342 }
3343 }
3344 } else {
3345 gbqual = GBQualifierNew ();
3346 if (gbqual != NULL) {
3347 gbqual->name = StringSave (qual);
3348 }
3349 }
3350 if (gbfeat->quals == NULL) {
3351 gbfeat->quals = gbqual;
3352 } else if (last != NULL) {
3353 last->next = gbqual;
3354 }
3355 last = gbqual;
3356 }
3357
3358 MemFree (copy);
3359 }
3360
GetMolTypeQual(BioseqPtr bsp)3361 NLM_EXTERN CharPtr GetMolTypeQual (
3362 BioseqPtr bsp
3363 )
3364
3365 {
3366 SeqMgrDescContext dcontext;
3367 MolInfoPtr mip;
3368 SeqDescrPtr sdp;
3369
3370 if (bsp == NULL) return NULL;
3371
3372 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3373 if (sdp == NULL) return NULL;
3374 mip = (MolInfoPtr) sdp->data.ptrvalue;
3375 if (mip == NULL) return NULL;
3376
3377 switch (mip->biomol) {
3378 case 0 :
3379 switch (bsp->mol) {
3380 case Seq_mol_dna :
3381 return "unassigned DNA";
3382 case Seq_mol_rna :
3383 return "unassigned RNA";
3384 case Seq_mol_na :
3385 break;
3386 default :
3387 break;
3388 }
3389 break;
3390 case MOLECULE_TYPE_GENOMIC :
3391 switch (bsp->mol) {
3392 case Seq_mol_dna :
3393 return "genomic DNA";
3394 case Seq_mol_rna :
3395 return "genomic RNA";
3396 case Seq_mol_na :
3397 break;
3398 default :
3399 break;
3400 }
3401 break;
3402 case MOLECULE_TYPE_PRE_MRNA :
3403 return "transcribed RNA";
3404 case MOLECULE_TYPE_MRNA :
3405 return "mRNA";
3406 case MOLECULE_TYPE_RRNA :
3407 return "rRNA";
3408 case MOLECULE_TYPE_TRNA :
3409 return "tRNA";
3410 case MOLECULE_TYPE_SNRNA :
3411 return "transcribed RNA";
3412 case MOLECULE_TYPE_SCRNA :
3413 return "transcribed RNA";
3414 case MOLECULE_TYPE_PEPTIDE :
3415 break;
3416 case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
3417 switch (bsp->mol) {
3418 case Seq_mol_dna :
3419 return "other DNA";
3420 case Seq_mol_rna :
3421 return "other RNA";
3422 case Seq_mol_na :
3423 break;
3424 default :
3425 break;
3426 }
3427 break;
3428 case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
3429 break;
3430 case MOLECULE_TYPE_CRNA :
3431 return "viral cRNA";
3432 break;
3433 case MOLECULE_TYPE_SNORNA :
3434 return "transcribed RNA";
3435 break;
3436 case MOLECULE_TYPE_TRANSCRIBED_RNA :
3437 return "transcribed RNA";
3438 break;
3439 case MOLECULE_TYPE_NCRNA :
3440 return "transcribed RNA";
3441 break;
3442 case MOLECULE_TYPE_TMRNA :
3443 return "transcribed RNA";
3444 break;
3445 case 255 :
3446 switch (bsp->mol) {
3447 case Seq_mol_dna :
3448 return "other DNA";
3449 case Seq_mol_rna :
3450 return "other RNA";
3451 case Seq_mol_na :
3452 break;
3453 default :
3454 break;
3455 }
3456 break;
3457 default :
3458 break;
3459 }
3460
3461 return NULL;
3462 }
3463
ParsePCRPrimerString(QualValPtr qvp)3464 static ValNodePtr ParsePCRPrimerString (
3465 QualValPtr qvp
3466 )
3467
3468 {
3469 CharPtr fwd_primer_seq = NULL;
3470 CharPtr rev_primer_seq = NULL;
3471 CharPtr fwd_primer_name = NULL;
3472 CharPtr rev_primer_name = NULL;
3473 SubSourcePtr ssp;
3474
3475 if (qvp == NULL) return NULL;
3476
3477 ssp = qvp [SCQUAL_fwd_primer_seq].ssp;
3478 if (ssp != NULL) {
3479 fwd_primer_seq = ssp->name;
3480 }
3481 ssp = qvp [SCQUAL_rev_primer_seq].ssp;
3482 if (ssp != NULL) {
3483 rev_primer_seq = ssp->name;
3484 }
3485 ssp = qvp [SCQUAL_fwd_primer_name].ssp;
3486 if (ssp != NULL) {
3487 fwd_primer_name = ssp->name;
3488 }
3489 ssp = qvp [SCQUAL_rev_primer_name].ssp;
3490 if (ssp != NULL) {
3491 rev_primer_name = ssp->name;
3492 }
3493
3494 return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
3495 }
3496
ParseColonString(CharPtr strs,Boolean multiple)3497 static ValNodePtr ParseColonString (
3498 CharPtr strs,
3499 Boolean multiple
3500 )
3501
3502 {
3503 ValNodePtr head = NULL;
3504 size_t len;
3505 CharPtr ptr, str, tmp;
3506
3507 if (StringHasNoText (strs)) return NULL;
3508
3509 tmp = StringSave (strs);
3510 str = tmp;
3511 len = StringLen (str);
3512 if (len > 1 && StringChr (str, ':') != NULL /* && multiple */) {
3513 while (StringDoesHaveText (str)) {
3514 ptr = StringChr (str, ':');
3515 if (ptr != NULL) {
3516 *ptr = '\0';
3517 ptr++;
3518 }
3519 TrimSpacesAroundString (str);
3520 ValNodeCopyStr (&head, 0, str);
3521 str = ptr;
3522 }
3523 } else {
3524 ValNodeCopyStr (&head, 0, str);
3525 }
3526
3527 MemFree (tmp);
3528 return head;
3529 }
3530
PrintHalfPrimer(ValNodePtr PNTR headp,CharPtr name,CharPtr seq,CharPtr nm_label,CharPtr sq_label,CharPtr prefix,Boolean name_only_ok,Boolean multiple)3531 static void PrintHalfPrimer (
3532 ValNodePtr PNTR headp,
3533 CharPtr name,
3534 CharPtr seq,
3535 CharPtr nm_label,
3536 CharPtr sq_label,
3537 CharPtr prefix,
3538 Boolean name_only_ok,
3539 Boolean multiple
3540 )
3541
3542 {
3543 ValNodePtr name_list, seq_list, name_vnp, seq_vnp;
3544 CharPtr str;
3545
3546 name_list = ParseColonString (name, multiple);
3547 seq_list = ParseColonString (seq, multiple);
3548
3549 name_vnp = name_list;
3550 seq_vnp = seq_list;
3551 if (seq_vnp != NULL) {
3552 while (seq_vnp != NULL) {
3553 if (name_vnp != NULL) {
3554 str = (CharPtr) name_vnp->data.ptrvalue;
3555 if (StringDoesHaveText (str)) {
3556 ValNodeCopyStr (headp, 0, prefix);
3557 ValNodeCopyStr (headp, 0, nm_label);
3558 ValNodeCopyStr (headp, 0, str);
3559 prefix = ", ";
3560 }
3561 name_vnp = name_vnp->next;
3562 }
3563 str = (CharPtr) seq_vnp->data.ptrvalue;
3564 if (StringDoesHaveText (str)) {
3565 ValNodeCopyStr (headp, 0, prefix);
3566 ValNodeCopyStr (headp, 0, sq_label);
3567 ValNodeCopyStr (headp, 0, str);
3568 prefix = ", ";
3569 }
3570 seq_vnp = seq_vnp->next;
3571 }
3572 } else if (name_only_ok) {
3573 while (name_vnp != NULL) {
3574 str = (CharPtr) name_vnp->data.ptrvalue;
3575 if (StringDoesHaveText (str)) {
3576 ValNodeCopyStr (headp, 0, prefix);
3577 ValNodeCopyStr (headp, 0, nm_label);
3578 ValNodeCopyStr (headp, 0, str);
3579 prefix = ", ";
3580 }
3581 name_vnp = name_vnp->next;
3582 }
3583 }
3584
3585 ValNodeFreeData (name_list);
3586 ValNodeFreeData (seq_list);
3587 }
3588
NextPCRPrimerString(PcrSetPtr psp,Boolean isInNote,Boolean multiple)3589 static CharPtr NextPCRPrimerString (
3590 PcrSetPtr psp,
3591 Boolean isInNote,
3592 Boolean multiple
3593 )
3594
3595 {
3596 ValNodePtr head = NULL, vnp;
3597 CharPtr prefix = NULL;
3598 CharPtr str;
3599
3600 if (psp == NULL) return NULL;
3601
3602 if (StringHasNoText (psp->fwd_seq) || StringHasNoText (psp->rev_seq)) {
3603 if (isInNote) {
3604 /*
3605 if (StringDoesHaveText (psp->fwd_name)) {
3606 ValNodeCopyStr (&head, 0, prefix);
3607 ValNodeCopyStr (&head, 0, "fwd_name: ");
3608 ValNodeCopyStr (&head, 0, psp->fwd_name);
3609 prefix = ", ";
3610 }
3611
3612 if (StringDoesHaveText (psp->fwd_seq)) {
3613 ValNodeCopyStr (&head, 0, prefix);
3614 ValNodeCopyStr (&head, 0, "fwd_seq: ");
3615 ValNodeCopyStr (&head, 0, psp->fwd_seq);
3616 prefix = ", ";
3617 }
3618
3619 if (StringDoesHaveText (psp->rev_name)) {
3620 ValNodeCopyStr (&head, 0, prefix);
3621 ValNodeCopyStr (&head, 0, "rev_name: ");
3622 ValNodeCopyStr (&head, 0, psp->rev_name);
3623 prefix = ", ";
3624 }
3625
3626 if (StringDoesHaveText (psp->rev_seq)) {
3627 ValNodeCopyStr (&head, 0, prefix);
3628 ValNodeCopyStr (&head, 0, "rev_seq: ");
3629 ValNodeCopyStr (&head, 0, psp->rev_seq);
3630 prefix = ", ";
3631 }
3632 */
3633 PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
3634 if (head != NULL) {
3635 prefix = ", ";
3636 }
3637 PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
3638 } else {
3639 return StringSave ("");
3640 }
3641 } else {
3642 if (isInNote) return StringSave ("");
3643
3644 PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
3645 PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3646 }
3647
3648 if (head != NULL && isInNote) {
3649 vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3650 if (vnp != NULL) {
3651 vnp->next = head;
3652 head = vnp;
3653 }
3654 }
3655
3656 str = MergeFFValNodeStrs (head);
3657 ValNodeFreeData (head);
3658 return str;
3659 }
3660
PrintHalfReaction(ValNodePtr PNTR headp,PCRPrimerPtr primers,CharPtr nm_label,CharPtr sq_label,CharPtr prefix,Boolean name_only_ok,Boolean multiple)3661 static void PrintHalfReaction (
3662 ValNodePtr PNTR headp,
3663 PCRPrimerPtr primers,
3664 CharPtr nm_label,
3665 CharPtr sq_label,
3666 CharPtr prefix,
3667 Boolean name_only_ok,
3668 Boolean multiple
3669 )
3670
3671 {
3672 PCRPrimerPtr ppp;
3673
3674 for (ppp = primers; ppp != NULL; ppp = ppp->next) {
3675 if (StringDoesHaveText (ppp->seq)) {
3676 if (StringDoesHaveText (ppp->name)) {
3677 ValNodeCopyStr (headp, 0, prefix);
3678 ValNodeCopyStr (headp, 0, nm_label);
3679 ValNodeCopyStr (headp, 0, ppp->name);
3680 prefix = ", ";
3681 }
3682 ValNodeCopyStr (headp, 0, prefix);
3683 ValNodeCopyStr (headp, 0, sq_label);
3684 ValNodeCopyStr (headp, 0, ppp->seq);
3685 prefix = ", ";
3686 } else if (name_only_ok) {
3687 if (StringDoesHaveText (ppp->name)) {
3688 ValNodeCopyStr (headp, 0, prefix);
3689 ValNodeCopyStr (headp, 0, nm_label);
3690 ValNodeCopyStr (headp, 0, ppp->name);
3691 prefix = ", ";
3692 }
3693 }
3694 }
3695 }
3696
NextPCRReaction(PCRReactionPtr prp,Boolean isInNote,Boolean multiple)3697 static CharPtr NextPCRReaction (
3698 PCRReactionPtr prp,
3699 Boolean isInNote,
3700 Boolean multiple
3701 )
3702
3703 {
3704 Boolean has_fwd_seq = FALSE, has_rev_seq = FALSE;
3705 ValNodePtr head = NULL, vnp;
3706 PCRPrimerPtr ppp;
3707 CharPtr prefix = NULL, str;
3708
3709 if (prp == NULL) return NULL;
3710
3711 for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) {
3712 if (StringDoesHaveText (ppp->seq)) {
3713 has_fwd_seq = TRUE;
3714 }
3715 }
3716
3717 for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) {
3718 if (StringDoesHaveText (ppp->seq)) {
3719 has_rev_seq = TRUE;
3720 }
3721 }
3722
3723 if (has_fwd_seq && has_rev_seq) {
3724 if (isInNote) {
3725 return StringSave ("");
3726 } else {
3727 PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
3728 PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3729 }
3730 } else {
3731 if (isInNote) {
3732 PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
3733 if (head != NULL) {
3734 prefix = ", ";
3735 }
3736 PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
3737 } else {
3738 return StringSave ("");
3739 }
3740 }
3741
3742 if (head != NULL && isInNote) {
3743 vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3744 if (vnp != NULL) {
3745 vnp->next = head;
3746 head = vnp;
3747 }
3748 }
3749
3750 str = MergeFFValNodeStrs (head);
3751 ValNodeFreeData (head);
3752 return str;
3753 }
3754
3755 /* specimen_voucher, culture_collection, bio_material default institution mouseover */
3756
3757 typedef struct instcodedata {
3758 CharPtr code;
3759 CharPtr name;
3760 } IcCodeData, PNTR IcCodePtr;
3761
3762 static ValNodePtr ic_code_list = NULL;
3763 static IcCodePtr PNTR ic_code_data = NULL;
3764 static Int4 ic_code_len = 0;
3765 static Boolean ic_code_loaded = FALSE;
3766
SortVnpByInstCode(VoidPtr ptr1,VoidPtr ptr2)3767 static int LIBCALLBACK SortVnpByInstCode (VoidPtr ptr1, VoidPtr ptr2)
3768
3769 {
3770 int compare;
3771 IcCodePtr irp1, irp2;
3772 CharPtr str1, str2;
3773 ValNodePtr vnp1, vnp2;
3774
3775 if (ptr1 == NULL || ptr2 == NULL) return 0;
3776 vnp1 = *((ValNodePtr PNTR) ptr1);
3777 vnp2 = *((ValNodePtr PNTR) ptr2);
3778 if (vnp1 == NULL || vnp2 == NULL) return 0;
3779 irp1 = (IcCodePtr) vnp1->data.ptrvalue;
3780 irp2 = (IcCodePtr) vnp2->data.ptrvalue;
3781 if (irp1 == NULL || irp2 == NULL) return 0;
3782 str1 = irp1->code;
3783 str2 = irp2->code;
3784 if (str1 == NULL || str2 == NULL) return 0;
3785 compare = StringCmp (str1, str2);
3786 if (compare > 0) {
3787 return 1;
3788 } else if (compare < 0) {
3789 return -1;
3790 }
3791 str1 = irp1->name;
3792 str2 = irp2->name;
3793 if (str1 == NULL || str2 == NULL) return 0;
3794 compare = StringCmp (str1, str2);
3795 if (compare > 0) {
3796 return 1;
3797 } else if (compare < 0) {
3798 return -1;
3799 }
3800 return 0;
3801 }
3802
SetupInstCodeNameTable(void)3803 static void SetupInstCodeNameTable (void)
3804
3805 {
3806 FileCache fc;
3807 CharPtr file = "institution_codes.txt";
3808 FILE *fp = NULL;
3809 Int4 i;
3810 IcCodePtr irp;
3811 ValNodePtr last = NULL;
3812 Char line [512];
3813 Char path [PATH_MAX];
3814 CharPtr ptr;
3815 ErrSev sev;
3816 CharPtr str;
3817 ValNodePtr vnp;
3818
3819 if (ic_code_loaded) return;
3820 if (ic_code_data != NULL) return;
3821
3822 if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
3823 FileBuildPath (path, NULL, file);
3824 sev = ErrSetMessageLevel (SEV_ERROR);
3825 fp = FileOpen (path, "r");
3826 ErrSetMessageLevel (sev);
3827 if (fp != NULL) {
3828 FileCacheSetup (&fc, fp);
3829
3830 str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
3831 while (str != NULL) {
3832 if (StringDoesHaveText (str)) {
3833 ptr = StringChr (str, '\t');
3834 if (ptr != NULL) {
3835 *ptr = '\0';
3836 ptr++;
3837 ptr = StringChr (ptr, '\t');
3838 if (ptr != NULL) {
3839 *ptr = '\0';
3840 ptr++;
3841 irp = (IcCodePtr) MemNew (sizeof (IcCodeData));
3842 if (irp != NULL) {
3843 TrimSpacesAroundString (str);
3844 TrimSpacesAroundString (ptr);
3845 irp->code = StringSave (str);
3846 irp->name = StringSave (ptr);
3847 vnp = ValNodeAddPointer (&last, 0, (Pointer) irp);
3848 if (ic_code_list == NULL) {
3849 ic_code_list = vnp;
3850 }
3851 last = vnp;
3852 }
3853 }
3854 }
3855 }
3856 str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
3857 }
3858
3859 FileClose (fp);
3860 ic_code_len = ValNodeLen (ic_code_list);
3861 if (ic_code_len > 0) {
3862 ic_code_list = ValNodeSort (ic_code_list, SortVnpByInstCode);
3863 ic_code_data = (IcCodePtr PNTR) MemNew (sizeof (IcCodePtr) * (ic_code_len + 1));
3864 if (ic_code_data != NULL) {
3865 for (vnp = ic_code_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
3866 irp = (IcCodePtr) vnp->data.ptrvalue;
3867 ic_code_data [i] = irp;
3868 }
3869 }
3870 }
3871 }
3872 }
3873
3874 ic_code_loaded = TRUE;
3875 }
3876
FullNameFromInstCode(CharPtr code)3877 static CharPtr FullNameFromInstCode (CharPtr code)
3878
3879 {
3880 CharPtr name = NULL;
3881 IcCodePtr irp;
3882 Int4 L, R, mid;
3883
3884 if (StringHasNoText (code)) return NULL;
3885
3886 if (ic_code_data == NULL) {
3887 SetupInstCodeNameTable ();
3888 }
3889 if (ic_code_data == NULL) return NULL;
3890
3891 L = 0;
3892 R = ic_code_len - 1;
3893 while (L < R) {
3894 mid = (L + R) / 2;
3895 irp = ic_code_data [(int) mid];
3896 if (irp != NULL && StringCmp (irp->code, code) < 0) {
3897 L = mid + 1;
3898 } else {
3899 R = mid;
3900 }
3901 }
3902 irp = ic_code_data [(int) R];
3903 if (irp != NULL && StringCmp (irp->code, code) == 0) {
3904 name = irp->name;
3905 }
3906
3907 return name;
3908 }
3909
3910 /* specimen_voucher, culture_collection, bio_material hyperlinks */
3911
3912 #define s_acbr_base "http://www.acbr-database.at/BioloMICS.aspx?Link=T&DB=0&Table=0&Descr="
3913 #define s_atcc_base "http://www.atcc.org/Products/All/"
3914 #define s_bcrc_base "https://catalog.bcrc.firdi.org.tw/BSAS_cart/controller?event=SEARCH&bcrc_no="
3915 #define s_cas_base "http://collections.calacademy.org/herp/specimen/"
3916 #define s_cbs_base "http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+"
3917 #define s_ccap_base "http://www.ccap.ac.uk/strain_info.php?Strain_No="
3918 #define s_ccmp_base "https://ccmp.bigelow.org/node/1/strain/CCMP"
3919 #define s_ccug_base "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno="
3920 #define s_cfmr_base "http://www.fpl.fs.fed.us/search/mycologysearch_action.php?sorting_rule=1u&phrasesAndKeywords02="
3921 #define s_cori_base "http://ccr.coriell.org/Sections/Search/Search.aspx?q="
3922 #define s_dsm_base "https://www.dsmz.de/catalogues/details/culture/DSM-"
3923 #define s_dsmz_base "https://www.dsmz.de/catalogues/details/culture/PV-"
3924 #define s_frr_base "http://www.foodscience.csiro.au/cgi-bin/rilax/search.pl?stpos=0&stype=AND&query="
3925 #define s_fsu_base "http://www.prz.uni-jena.de/data.php?fsu="
3926 #define s_jcm_base "http://www.jcm.riken.jp/cgi-bin/jcm/jcm_number?JCM="
3927 #define s_kctc_base "http://kctc.kribb.re.kr/English/_SearchView.aspx?sn="
3928 #define s_ku_base "https://ichthyology.specify.ku.edu/specify/bycatalog/"
3929 #define s_lcr_base "http://scd.landcareresearch.co.nz/Specimen/"
3930 #define s_maff_base "http://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff="
3931 #define s_mcz_base "http://mczbase.mcz.harvard.edu/guid/"
3932 #define s_mtcc_base "http://mtcc.imtech.res.in/catalogue_hyper.php?a="
3933 #define s_mucl_base "http://bccm.belspo.be/db/mucl_search_results.php?FIRSTITEM=1&LIST1=STRAIN_NUMBER&TEXT1="
3934 #define s_nbrc_base "http://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT="
3935 #define s_ncimb_base "http://www.ncimb.com/BioloMICS.aspx?Table=NCIMBstrains&ExactMatch=T&Fields=All&Name=NCIMB%20"
3936 #define s_nctc_base "https://www.phe-culturecollections.org.uk/products/bacteria/detail.jsp?collection=nctc&refId=NCTC+"
3937 #define s_nrrl_base "http://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/report.html?nrrlcodes="
3938 #define s_nrrl_mold "http://nrrl.ncaur.usda.gov/cgi-bin/usda/mold/report.html?nrrlcodes="
3939 #define s_nrrl_prok "http://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/report.html?nrrlcodes="
3940 #define s_nrrl_yest "http://nrrl.ncaur.usda.gov/cgi-bin/usda/yeast/report.html?nrrlcodes="
3941 #define s_pcc_base "http://www.crbip.pasteur.fr/fiches/fichecata.jsp?crbip=PCC+"
3942 #define s_pcmb_base "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3="
3943 #define s_pycc_base "http://pycc.bio-aware.com/BioloMICS.aspx?Table=PYCC%20strains&Name=PYCC%20"
3944 #define s_sag_base "http://sagdb.uni-goettingen.de/detailedList.php?str_number="
3945 #define s_tgrc_base "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum="
3946 #define s_uam_base "http://arctos.database.museum/guid/"
3947 #define s_uamh_base "https://secure.devonian.ualberta.ca/uamh/details.php?id="
3948 #define s_usnm_base "http://collections.mnh.si.edu/services/resolver/resolver.php?"
3949 #define s_ypm_base "http://collections.peabody.yale.edu/search/Record/"
3950
3951 #define s_colon_pfx ":"
3952 #define s_uscr_pfx "_"
3953
3954 #define s_kui_pfx "KUI/"
3955 #define s_kuit_pfx "KUIT/"
3956 #define s_psu_pfx "PSU:Mamm:"
3957 #define s_usnm_pfx "voucher=Birds:"
3958
3959 #define s_ypment_pfx "YPM-ENT-"
3960 #define s_ypmher_pfx "YPM-HER-"
3961 #define s_ypmich_pfx "YPM-ICH-"
3962 #define s_ypmiz_pfx "YPM-IZ-"
3963 #define s_ypmmam_pfx "YPM-MAM-"
3964 #define s_ypmorn_pfx "YPM-ORN-"
3965
3966 #define s_acbr_sfx "&Fields=All&ExactMatch=T"
3967 #define s_atcc_sfx ".aspx"
3968 #define s_bcrc_sfx "&type_id=9&keyword="
3969 #define s_ku_sfx "/"
3970 #define s_mucl_sfx "&LIST2=ALL+FIELDS&CONJ=OR&RANGE=20&B3=Run+Query"
3971 #define s_pycc_sfx "&Fields=All&ExactMatch=T"
3972
3973 typedef struct vouch {
3974 CharPtr sites;
3975 CharPtr links;
3976 Boolean prepend_institute;
3977 Int2 pad_to;
3978 CharPtr pad_with;
3979 CharPtr prefix;
3980 CharPtr suffix;
3981 } VouchData, PNTR VouchDataPtr;
3982
3983 static VouchData Nlm_spec_vouchers [] = {
3984 { "ACBR", s_acbr_base, FALSE, 0, "", NULL, s_acbr_sfx },
3985 { "ATCC", s_atcc_base, FALSE, 0, "", NULL, s_atcc_sfx },
3986 { "BCRC", s_bcrc_base, FALSE, 0, "", NULL, s_bcrc_sfx },
3987 { "CAS:HERP", s_cas_base, TRUE, 0, "", s_colon_pfx, NULL },
3988 { "CBS", s_cbs_base, FALSE, 0, "", NULL, NULL },
3989 { "CCAP", s_ccap_base, FALSE, 0, "", NULL, NULL },
3990 { "CCMP", s_ccmp_base, FALSE, 0, "", NULL, NULL },
3991 { "CCUG", s_ccug_base, FALSE, 0, "", NULL, NULL },
3992 { "CFMR", s_cfmr_base, FALSE, 0, "", NULL, NULL },
3993 { "CHR", s_lcr_base, TRUE, 0, "", s_uscr_pfx, NULL },
3994 { "Coriell", s_cori_base, FALSE, 0, "", NULL, NULL },
3995 { "CRCM:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
3996 { "DGR:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
3997 { "DGR:Ento", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
3998 { "DGR:Fish", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
3999 { "DGR:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4000 { "DGR:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4001 { "DMNS:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4002 { "DMNS:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4003 { "DSM", s_dsm_base, FALSE, 0, "", NULL, NULL },
4004 { "DSMZ", s_dsmz_base, FALSE, 0, "", NULL, NULL },
4005 { "FRR", s_frr_base, FALSE, 0, "", NULL, NULL },
4006 { "FSU<DEU>", s_fsu_base, FALSE, 0, "", NULL, NULL },
4007 { "ICMP", s_lcr_base, TRUE, 0, "", s_uscr_pfx, NULL },
4008 { "JCM", s_jcm_base, FALSE, 0, "", NULL, NULL },
4009 { "KCTC", s_kctc_base, FALSE, 0, "", NULL, NULL },
4010 { "KNWR:Ento", s_uam_base , TRUE, 0, "", s_colon_pfx, NULL },
4011 { "KU:I", s_ku_base, FALSE, 0, "", s_kui_pfx, s_ku_sfx },
4012 { "KU:IT", s_ku_base, FALSE, 0, "", s_kuit_pfx, s_ku_sfx },
4013 { "KWP:Ento", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4014 { "MAFF", s_maff_base, FALSE, 0, "", NULL, NULL },
4015 { "MCZ:Bird", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4016 { "MCZ:Cryo", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4017 { "MCZ:Ent", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4018 { "MCZ:Fish", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4019 { "MCZ:Herp", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4020 { "MCZ:Ich", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4021 { "MCZ:IP", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4022 { "MCZ:IZ", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4023 { "MCZ:Mala", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4024 { "MCZ:Mamm", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4025 { "MCZ:Orn", s_mcz_base, TRUE, 0, "", s_colon_pfx, NULL },
4026 { "MLZ:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4027 { "MLZ:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4028 { "MSB:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4029 { "MSB:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4030 { "MSB:Para", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4031 { "MTCC", s_mtcc_base, FALSE, 0, "", NULL, NULL },
4032 { "MUCL", s_mucl_base, FALSE, 0, "", NULL, s_mucl_sfx },
4033 { "MVZ:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4034 { "MVZ:Egg", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4035 { "MVZ:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4036 { "MVZ:Hild", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4037 { "MVZ:Img", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4038 { "MVZ:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4039 { "MVZ:Page", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4040 { "MVZObs:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4041 { "NBRC", s_nbrc_base, FALSE, 8, "0", NULL, NULL },
4042 { "NBSB:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4043 { "NCIMB", s_ncimb_base, FALSE, 0, "", NULL, NULL },
4044 { "NCTC", s_nctc_base, FALSE, 0, "", NULL, NULL },
4045 { "NRRL", s_nrrl_base, FALSE, 0, "", NULL, NULL },
4046 { "NRRL:MOLD", s_nrrl_mold, FALSE, 0, "", NULL, NULL },
4047 { "NRRL:PROK", s_nrrl_prok, FALSE, 0, "", NULL, NULL },
4048 { "NRRL:YEAST", s_nrrl_yest, FALSE, 0, "", NULL, NULL },
4049 { "NZAC", s_lcr_base, TRUE, 0, "", s_uscr_pfx, NULL },
4050 { "PCC", s_pcc_base, FALSE, 0, "", NULL, NULL },
4051 { "PCMB", s_pcmb_base, FALSE, 0, "", NULL, NULL },
4052 { "PDD", s_lcr_base, TRUE , 0, "", s_uscr_pfx, NULL },
4053 { "PSU<USA-OR>:Mamm", s_uam_base, FALSE, 0, "", s_psu_pfx, NULL },
4054 { "PYCC", s_pycc_base, FALSE, 0, "", NULL, s_pycc_sfx },
4055 { "SAG", s_sag_base, FALSE, 0, "", NULL, NULL },
4056 { "TGRC", s_tgrc_base, FALSE, 0, "", NULL, NULL },
4057 { "UAM:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4058 { "UAM:Bryo", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4059 { "UAM:Crus", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4060 { "UAM:Ento", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4061 { "UAM:Fish", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4062 { "UAM:Herb", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4063 { "UAM:Herp", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4064 { "UAM:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4065 { "UAM:Moll", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4066 { "UAM:Paleo", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4067 { "UAMH", s_uamh_base, FALSE, 0, "", NULL, NULL },
4068 { "UAMObs:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4069 { "USNM:Birds", s_usnm_base, FALSE, 0, "", s_usnm_pfx, NULL },
4070 { "WNMU:Bird", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4071 { "WNMU:Fish", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4072 { "WNMU:Mamm", s_uam_base, TRUE, 0, "", s_colon_pfx, NULL },
4073 { "YPM:ENT", s_ypm_base, FALSE, 6, "0", s_ypment_pfx, NULL },
4074 { "YPM:HER", s_ypm_base, FALSE, 6, "0", s_ypmher_pfx, NULL },
4075 { "YPM:ICH", s_ypm_base, FALSE, 6, "0", s_ypmich_pfx, NULL },
4076 { "YPM:IZ", s_ypm_base, FALSE, 6, "0", s_ypmiz_pfx, NULL },
4077 { "YPM:MAM", s_ypm_base, FALSE, 6, "0", s_ypmmam_pfx, NULL },
4078 { "YPM:ORN", s_ypm_base, FALSE, 6, "0", s_ypmorn_pfx, NULL },
4079 { NULL, NULL, FALSE, 0, "", NULL, NULL }
4080 };
4081
VoucherNameIsValid(CharPtr name)4082 static Int2 VoucherNameIsValid (
4083 CharPtr name
4084 )
4085
4086 {
4087 Int2 L, R, mid;
4088 CharPtr ptr;
4089 Char str [256];
4090
4091 if (StringHasNoText (name)) return -1;
4092 StringNCpy_0 (str, name, sizeof (str));
4093 ptr = StringChr (str, ' ');
4094 if (ptr != NULL) {
4095 *ptr = '\0';
4096 }
4097
4098 L = 0;
4099 R = sizeof (Nlm_spec_vouchers) / sizeof (Nlm_spec_vouchers [0]) - 1; /* -1 because now NULL terminated */
4100
4101 while (L < R) {
4102 mid = (L + R) / 2;
4103 if (StringICmp (Nlm_spec_vouchers [mid].sites, str) < 0) {
4104 L = mid + 1;
4105 } else {
4106 R = mid;
4107 }
4108 }
4109
4110 /* switch to case sensitive comparison to restore case strictness */
4111
4112 if (StringICmp (Nlm_spec_vouchers [R].sites, str) == 0) {
4113 return R;
4114 }
4115
4116 return -1;
4117 }
4118
4119 /* works on subname copy that it can change */
4120
ParseSecVoucher(CharPtr subname,CharPtr PNTR inst,CharPtr PNTR id)4121 static Boolean ParseSecVoucher (
4122 CharPtr subname,
4123 CharPtr PNTR inst,
4124 CharPtr PNTR id
4125 )
4126
4127 {
4128 CharPtr ptr;
4129 CharPtr tmp;
4130
4131 if (StringHasNoText (subname)) return FALSE;
4132 if (StringLen (subname) < 5) return FALSE;
4133 TrimSpacesAroundString (subname);
4134
4135 ptr = StringChr (subname, ':');
4136 if (ptr == NULL) return FALSE;
4137
4138 *inst = subname;
4139
4140 tmp = StringChr (ptr + 1, ':');
4141 if (tmp != NULL) {
4142 *tmp = '\0';
4143 tmp++;
4144 TrimSpacesAroundString (tmp);
4145 *id = tmp;
4146 } else {
4147 *ptr = '\0';
4148 ptr++;
4149 TrimSpacesAroundString (ptr);
4150 *id = ptr;
4151 }
4152
4153 if (StringHasNoText (*inst) || StringHasNoText (*id)) return FALSE;
4154
4155 return TRUE;
4156 }
4157
Do_www_specimen_voucher(StringItemPtr ffstring,CharPtr inst,CharPtr id,VouchDataPtr vdp)4158 static void Do_www_specimen_voucher (
4159 StringItemPtr ffstring,
4160 CharPtr inst,
4161 CharPtr id,
4162 VouchDataPtr vdp
4163 )
4164
4165 {
4166 size_t len_id, len_pad;
4167 CharPtr mouseover = NULL;
4168
4169 if ( ffstring == NULL || inst == NULL || id == NULL || vdp == NULL || vdp->links == NULL ) return;
4170
4171 mouseover = FullNameFromInstCode (inst);
4172 if (mouseover != NULL) {
4173 FFAddOneString (ffstring, "<acronym title=\"", FALSE, FALSE, TILDE_IGNORE);
4174 FFAddOneString (ffstring, mouseover, FALSE, FALSE, TILDE_IGNORE);
4175 FFAddOneString(ffstring, "\" class=\"voucher\">", FALSE, FALSE, TILDE_IGNORE);
4176 FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
4177 FFAddOneString (ffstring, "</acronym>", FALSE, FALSE, TILDE_IGNORE);
4178 } else {
4179 FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
4180 }
4181 FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
4182 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4183 FFAddOneString (ffstring, vdp->links, FALSE, FALSE, TILDE_IGNORE);
4184 if (vdp->prepend_institute) {
4185 FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
4186 }
4187 if (vdp->prefix != NULL) {
4188 FFAddOneString (ffstring, vdp->prefix, FALSE, FALSE, TILDE_IGNORE);
4189 }
4190 if (vdp->pad_to > 0) {
4191 len_id = StringLen (id);
4192 len_pad = StringLen (vdp->pad_with);
4193 while (len_id < vdp->pad_to) {
4194 FFAddOneString (ffstring, vdp->pad_with, FALSE, FALSE, TILDE_IGNORE);
4195 len_id += len_pad;
4196 }
4197 }
4198 FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
4199 if (vdp->suffix != NULL) {
4200 FFAddOneString (ffstring, vdp->suffix, FALSE, FALSE, TILDE_IGNORE);
4201 }
4202 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
4203 FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
4204 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4205 }
4206
FF_www_specimen_voucher(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr subname)4207 NLM_EXTERN void FF_www_specimen_voucher (
4208 IntAsn2gbJobPtr ajp,
4209 StringItemPtr ffstring,
4210 CharPtr subname
4211 )
4212
4213 {
4214 Char buf [512];
4215 CharPtr inst = NULL, id = NULL, mouseover = NULL, encoded;
4216 Int2 R;
4217 VouchDataPtr vdp;
4218
4219 if ( ffstring == NULL || subname == NULL ) return;
4220 if (! GetWWW (ajp)) { /* not in www mode */
4221 FFAddTextToString(ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4222 return;
4223 }
4224 StringNCpy_0 (buf, subname, sizeof (buf));
4225 if (! ParseSecVoucher (buf, &inst, &id)) {
4226 FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4227 return;
4228 }
4229 R = VoucherNameIsValid (inst);
4230 if (R < 0) {
4231 mouseover = FullNameFromInstCode (inst);
4232 if (mouseover != NULL) {
4233 FFAddOneString (ffstring, "<acronym title=\"", FALSE, FALSE, TILDE_IGNORE);
4234 FFAddOneString (ffstring, mouseover, FALSE, FALSE, TILDE_IGNORE);
4235 FFAddOneString(ffstring, "\" class=\"voucher\">", FALSE, FALSE, TILDE_IGNORE);
4236 encoded = EncodeXml (inst);
4237 FFAddOneString (ffstring, encoded, FALSE, FALSE, TILDE_IGNORE);
4238 MemFree (encoded);
4239 FFAddOneString (ffstring, "</acronym>", FALSE, FALSE, TILDE_IGNORE);
4240 FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
4241 FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
4242 } else {
4243 FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4244 }
4245 return;
4246 }
4247 vdp = &(Nlm_spec_vouchers [R]);
4248 if (vdp == NULL || vdp->links == NULL) {
4249 FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4250 return;
4251 }
4252 encoded = EncodeXml (inst);
4253 Do_www_specimen_voucher (ffstring, encoded, id, vdp);
4254 MemFree (encoded);
4255 }
4256
Do_www_lat_lon(StringItemPtr ffstring,CharPtr lat_lon)4257 static void Do_www_lat_lon (
4258 StringItemPtr ffstring,
4259 CharPtr lat_lon
4260 )
4261
4262 {
4263 Char buf [128];
4264 Char ch;
4265 CharPtr ew = "";
4266 Int2 i;
4267 CharPtr ns = "";
4268 CharPtr ptr;
4269 Char tmp [128];
4270 CharPtr tokens [6];
4271
4272 if ( ffstring == NULL || lat_lon == NULL ) return;
4273
4274 MemSet ((Pointer) tokens, 0, sizeof (tokens));
4275
4276 StringNCpy_0 (buf, lat_lon, sizeof (buf));
4277
4278 i = 0;
4279 ptr = buf;
4280 ch = *ptr;
4281 tokens [i] = ptr;
4282 while (ch != '\0' && i < 5) {
4283 if (ch == ' ') {
4284 *ptr = '\0';
4285 ptr++;
4286 ch = *ptr;
4287 while (ch == ' ') {
4288 ptr++;
4289 ch = *ptr;
4290 }
4291 i++;
4292 tokens [i] = ptr;
4293 } else {
4294 ptr++;
4295 ch = *ptr;
4296 }
4297 }
4298
4299 ptr = tokens [1];
4300 if (ptr != NULL && *ptr == 'S') {
4301 ns = "-";
4302 }
4303 ptr = tokens [3];
4304 if (ptr != NULL && *ptr == 'W') {
4305 ew = "-";
4306 }
4307
4308 if (tokens [0] == NULL) {
4309 tokens [0] = "?";
4310 }
4311 if (tokens [2] == NULL) {
4312 tokens [2] = "?";
4313 }
4314
4315 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4316 /*
4317 FF_Add_NCBI_Base_URL (ffstring, link_lat_lon);
4318 sprintf (tmp, "lat=%s%s&lon=%s%s", ns, tokens [0], ew, tokens [2]);
4319 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
4320 */
4321 FF_Add_NCBI_Base_URL (ffstring, "https://www.google.com/maps/place/");
4322 sprintf (tmp, "%s%s+%s%s", ns, tokens [0], ew, tokens [2]);
4323 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
4324 FFAddTextToString (ffstring, "\">", lat_lon, "</a>", FALSE, FALSE, TILDE_IGNORE);
4325 }
4326
FF_www_lat_lon(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr lat_lon)4327 static void FF_www_lat_lon (
4328 IntAsn2gbJobPtr ajp,
4329 StringItemPtr ffstring,
4330 CharPtr lat_lon
4331 )
4332
4333 {
4334 Boolean format_ok = FALSE;
4335 FloatHi lat = 0.0;
4336 FloatHi lon = 0.0;
4337 Boolean lat_in_range = FALSE;
4338 Boolean lon_in_range = FALSE;
4339 Boolean precision_ok = FALSE;
4340
4341 if ( ffstring == NULL || lat_lon == NULL ) return;
4342 if (! GetWWW (ajp)) { /* not in www mode */
4343 FFAddTextToString(ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4344 return;
4345 }
4346 if (StringDoesHaveText (lat_lon)) {
4347 IsCorrectLatLonFormat (lat_lon, &format_ok, &precision_ok, &lat_in_range, &lon_in_range);
4348 if (format_ok && lat_in_range && lon_in_range) {
4349 if (ParseLatLon (lat_lon, &lat, &lon)) {
4350 Do_www_lat_lon (ffstring, lat_lon);
4351 return;
4352 }
4353 }
4354 }
4355
4356 /* if any of above tests failed, default print */
4357 FFAddTextToString (ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
4358 }
4359
FormatSourceFeatBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)4360 NLM_EXTERN CharPtr FormatSourceFeatBlock (
4361 Asn2gbFormatPtr afp,
4362 BaseBlockPtr bbp
4363 )
4364
4365 {
4366 Boolean add_period;
4367 IntAsn2gbJobPtr ajp;
4368 Asn2gbSectPtr asp;
4369 BioSourcePtr biop = NULL;
4370 BioseqPtr bsp;
4371 BioseqSetPtr bssp;
4372 Char buf [128], pfx [512], sfx [128];
4373 CharPtr common = NULL;
4374 Char currAccVer [SEQID_MAX_LEN];
4375 DbtagPtr dbt;
4376 SeqMgrDescContext dcontext;
4377 SeqMgrFeatContext fcontext;
4378 GBFeaturePtr gbfeat = NULL;
4379 GBSeqPtr gbseq;
4380 Int2 i;
4381 IntAsn2gbSectPtr iasp;
4382 Uint1 idx;
4383 IntSrcBlockPtr isp;
4384 Boolean is_desc = TRUE;
4385 Boolean is_gps = FALSE;
4386 Boolean is_other = FALSE;
4387 Boolean is_est_or_gss = FALSE;
4388 Boolean is_bc;
4389 Boolean is_rf;
4390 Boolean is_sc;
4391 Int2 j;
4392 Uint1 jdx;
4393 CharPtr js = NULL;
4394 Uint1 lastomptype;
4395 Uint1 lastssptype;
4396 SeqLocPtr location = NULL;
4397 MolInfoPtr mip;
4398 CharPtr notestr;
4399 SourceType PNTR notetbl = NULL;
4400 Boolean okay;
4401 ObjectIdPtr oip;
4402 OrgModPtr omp;
4403 OrgNamePtr onp = NULL;
4404 OrgRefPtr orp = NULL;
4405 Boolean partial5;
4406 Boolean partial3;
4407 CharPtr prefix;
4408 PCRReactionPtr prp;
4409 ValNodePtr pset;
4410 PcrSetPtr psp;
4411 SourceType PNTR qualtbl = NULL;
4412 QualValPtr qvp;
4413 SeqDescrPtr sdp = NULL;
4414 SeqEntryPtr sep;
4415 SeqFeatPtr sfp = NULL;
4416 SeqIdPtr sip;
4417 SubSourcePtr ssp;
4418 CharPtr str;
4419 BioseqPtr target;
4420 CharPtr taxname = NULL;
4421 ValNodePtr vnp;
4422 StringItemPtr ffstring, unique;
4423
4424 if (afp == NULL || bbp == NULL) return NULL;
4425 ajp = afp->ajp;
4426 if (ajp == NULL) return NULL;
4427 asp = afp->asp;
4428 if (asp == NULL) return NULL;
4429 target = asp->target;
4430 bsp = asp->bsp;
4431 if (target == NULL || bsp == NULL) return NULL;
4432 qvp = afp->qvp;
4433 if (qvp == NULL) return NULL;
4434
4435 pfx [0] = '\0';
4436 sfx [0] = '\0';
4437
4438 if (ajp->gbseq) {
4439 gbseq = &asp->gbseq;
4440 } else {
4441 gbseq = NULL;
4442 }
4443
4444 /* five-column feature table uses special code for formatting */
4445
4446 if (ajp->format == FTABLE_FMT) {
4447 str = FormatFtableSourceFeatBlock (bbp, target);
4448 return str;
4449 }
4450
4451 /* otherwise do regular flatfile formatting */
4452
4453 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
4454
4455 isp = (IntSrcBlockPtr) bbp;
4456
4457 /* could be descriptor or feature */
4458
4459 if (bbp->itemtype == OBJ_SEQDESC) {
4460 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
4461 if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
4462 biop = (BioSourcePtr) sdp->data.ptrvalue;
4463 }
4464 } else if (bbp->itemtype == OBJ_SEQFEAT) {
4465 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
4466 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
4467 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
4468 }
4469 is_desc = FALSE;
4470 }
4471
4472 if (biop == NULL) return NULL;
4473
4474 unique = FFGetString(ajp);
4475 if ( unique == NULL ) return NULL;
4476
4477 ffstring = FFGetString(ajp);
4478 if ( ffstring == NULL ) return NULL;
4479
4480 FFStartPrint (ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
4481
4482 /*
4483 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4484 if (sip->choice == SEQID_GI) {
4485 currGi = (BIG_ID) sip->data.intvalue;
4486 }
4487 }
4488 */
4489 currAccVer [0] = '\0';
4490 GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, TRUE);
4491
4492 iasp = (IntAsn2gbSectPtr) asp;
4493
4494 if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE) {
4495 if (iasp->feat_key [FEATDEF_BIOSRC] == NULL) {
4496 iasp->feat_key [FEATDEF_BIOSRC] = StringSave ("source");
4497 }
4498 }
4499
4500 if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans &&
4501 (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
4502 sprintf (pfx, "<span id=\"feature_%s_source_%ld\" class=\"feature\">", currAccVer, (long) isp->source_count);
4503 }
4504
4505 FFAddOneString (ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
4506 FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
4507
4508 if (gbseq != NULL) {
4509 gbfeat = GBFeatureNew ();
4510 if (gbfeat != NULL) {
4511 gbfeat->key = StringSave ("source");
4512 }
4513 }
4514
4515 location = isp->loc;
4516
4517 str = FFFlatLoc (ajp, bsp, location, ajp->masterStyle, FALSE);
4518
4519 /* if multi-interval join remainders for focus after subtraction, switch to order */
4520 if (sdp != NULL && biop != NULL && biop->is_focus && StringStr (str, "join") != NULL) {
4521 FindReplaceString (&str, "join", "order", FALSE, FALSE);
4522 }
4523
4524 if (iasp != NULL && GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans) {
4525 js = AddJsInterval (iasp, pfx, bsp, FEATDEF_BIOSRC, location, currAccVer);
4526 }
4527 if ( GetWWW(ajp) ) {
4528 FF_www_featloc (ffstring, str);
4529 } else {
4530 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
4531 }
4532 FFAddOneChar(ffstring, '\n', FALSE);
4533
4534 if (gbseq != NULL) {
4535 if (gbfeat != NULL) {
4536 if (StringDoesHaveText (str)) {
4537 gbfeat->location = StringSave (str);
4538 } else {
4539 gbfeat->location = StringSave ("");
4540 }
4541 if (StringDoesHaveText (str)) {
4542 if (StringStr (str, "join") != NULL) {
4543 gbfeat->operator__ = StringSave ("join");
4544 } else if (StringStr (str, "order") != NULL) {
4545 gbfeat->operator__ = StringSave ("order");
4546 }
4547 }
4548 CheckSeqLocForPartial (location, &partial5, &partial3);
4549 gbfeat->partial5 = partial5;
4550 gbfeat->partial3 = partial3;
4551 if (ajp->masterStyle) {
4552 AddIntervalsToGbfeat (gbfeat, location, bsp);
4553 } else {
4554 AddIntervalsToGbfeat (gbfeat, location, NULL);
4555 }
4556 }
4557 }
4558
4559 MemFree (str);
4560
4561 orp = biop->org;
4562 if (orp != NULL) {
4563 taxname = orp->taxname;
4564 /* common = orp->common; */
4565 }
4566 if (StringHasNoText (taxname)) {
4567 if (ajp->flags.needOrganismQual) {
4568 taxname = "unknown";
4569 if (orp != NULL) {
4570 common = orp->common;
4571 }
4572 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
4573 } else {
4574 taxname = "unknown";
4575 common = orp->common;
4576 #endif
4577 }
4578 }
4579
4580 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
4581 if (sep != NULL && IS_Bioseq_set (sep)) {
4582 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4583 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
4584 is_gps = TRUE;
4585 }
4586 }
4587
4588 if (bsp != NULL) {
4589 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4590 if (sip->choice == SEQID_OTHER) {
4591 is_other = TRUE;
4592 }
4593 }
4594 }
4595
4596 if (ajp->refseqConventions) {
4597 is_other = TRUE;
4598 }
4599
4600 /* populate qualifier table from biosource fields */
4601
4602 qvp [SCQUAL_organism].str = taxname;
4603 qvp [SCQUAL_common_name].str = common;
4604
4605 if (biop->is_focus) {
4606 qvp [SCQUAL_focus].ble = TRUE;
4607 }
4608
4609 str = GetMolTypeQual (bsp);
4610 /*
4611 if (StringICmp (str, "ncRNA") == 0) {
4612 str = "other RNA";
4613 }
4614 */
4615 if (str == NULL) {
4616 switch (bsp->mol) {
4617 case Seq_mol_dna :
4618 str = "unassigned DNA";
4619 break;
4620 case Seq_mol_rna :
4621 str = "unassigned RNA";
4622 break;
4623 case Seq_mol_aa :
4624 break;
4625 default :
4626 str = "unassigned DNA";
4627 break;
4628 }
4629 }
4630 qvp [SCQUAL_mol_type].str = str;
4631
4632 SubSourceToQualArray (biop->subtype, qvp);
4633
4634 if (orp != NULL) {
4635 onp = orp->orgname;
4636 if (onp != NULL) {
4637 OrgModToQualArray (onp->mod, qvp);
4638 }
4639
4640 if (! is_desc) {
4641 qvp [SCQUAL_unstructured].vnp = orp->mod;
4642 }
4643 qvp [SCQUAL_db_xref].vnp = orp->db;
4644 }
4645
4646 if (sfp != NULL) {
4647 qvp [SCQUAL_org_xref].vnp = sfp->dbxref;
4648 }
4649
4650 /* organelle currently prints /mitochondrion, /virion, etc. */
4651
4652 qvp [SCQUAL_organelle].num = biop->genome;
4653
4654 /* some qualifiers are flags in genome and names in subsource, print once with name */
4655
4656 if (qvp [SCQUAL_ins_seq_name].ssp != NULL &&
4657 qvp [SCQUAL_organelle].num == GENOME_insertion_seq) {
4658 qvp [SCQUAL_organelle].num = 0;
4659 }
4660 if (qvp [SCQUAL_plasmid_name].ssp != NULL &&
4661 qvp [SCQUAL_organelle].num == GENOME_plasmid) {
4662 qvp [SCQUAL_organelle].num = 0;
4663 }
4664 /* AF095904.1
4665 if (qvp [SCQUAL_plastid_name].ssp != NULL &&
4666 qvp [SCQUAL_organelle].num == GENOME_plastid) {
4667 qvp [SCQUAL_organelle].num = 0;
4668 }
4669 */
4670 if (qvp [SCQUAL_transposon_name].ssp != NULL &&
4671 qvp [SCQUAL_organelle].num == GENOME_transposon) {
4672 qvp [SCQUAL_organelle].num = 0;
4673 }
4674
4675 if (sfp != NULL) {
4676 qvp [SCQUAL_seqfeat_note].str = sfp->comment;
4677 }
4678
4679 if (qvp [SCQUAL_fwd_primer_name].ssp != NULL ||
4680 qvp [SCQUAL_fwd_primer_seq].ssp != NULL ||
4681 qvp [SCQUAL_rev_primer_name].ssp != NULL ||
4682 qvp [SCQUAL_rev_primer_seq].ssp != NULL) {
4683 qvp [SCQUAL_PCR_primers].ble = TRUE;
4684 qvp [SCQUAL_PCR_primer_note].ble = TRUE;
4685 }
4686
4687 if (biop->pcr_primers != NULL) {
4688 qvp [SCQUAL_PCR_reaction].prp = biop->pcr_primers;
4689 }
4690
4691 if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
4692 /* leave metagenome_source as a separate qualifier */
4693 } else {
4694 /* move metagenome_source to note */
4695 qvp [SCQUAL_metagenome_note].omp = qvp [SCQUAL_metagenome_source].omp;
4696 qvp [SCQUAL_metagenome_source].omp = NULL;
4697 }
4698
4699 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
4700 if (qvp [SCQUAL_altitude].ssp != NULL) {
4701 ssp = qvp [SCQUAL_altitude].ssp;
4702 if (! AltitudeIsValid (ssp->name)) {
4703 qvp [SCQUAL_altitude].ssp = NULL;
4704 }
4705 }
4706 if (qvp [SCQUAL_type_material].omp != NULL) {
4707 ssp = qvp [SCQUAL_type_material].ssp;
4708 if (! TypeMaterialIsValid (ssp->name)) {
4709 qvp [SCQUAL_type_material].ssp = NULL;
4710 }
4711 }
4712 }
4713
4714 #if 0
4715 if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
4716 /* leave mating_type as a separate qualifier */
4717 } else if (qvp [SCQUAL_sex].ssp == NULL && qvp [SCQUAL_mating_type].ssp != NULL) {
4718 /* move mating_type to sex if available */
4719 qvp [SCQUAL_sex].ssp = qvp [SCQUAL_mating_type].ssp;
4720 qvp [SCQUAL_mating_type].ssp = NULL;
4721 }
4722 #endif
4723
4724 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
4725 if (sdp != NULL) {
4726 mip = (MolInfoPtr) sdp->data.ptrvalue;
4727 if (mip != NULL) {
4728 if (mip->tech == MI_TECH_est || mip->tech == MI_TECH_survey) {
4729 is_est_or_gss = TRUE;
4730 }
4731 }
4732 }
4733
4734 /* now print qualifiers from table */
4735
4736 qualtbl = source_qual_order;
4737 if (is_desc) {
4738 notetbl = source_desc_note_order;
4739 } else {
4740 notetbl = source_feat_note_order;
4741 }
4742
4743 for (i = 0, idx = qualtbl [i]; idx != 0; i++, idx = qualtbl [i]) {
4744
4745 lastomptype = 0;
4746 lastssptype = 0;
4747 switch (asn2gnbk_source_quals [idx].qualclass) {
4748
4749 case Qual_class_ignore :
4750 break;
4751
4752 case Qual_class_string :
4753 if (! StringHasNoText (qvp [idx].str)) {
4754 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4755 FALSE, FALSE, TILDE_IGNORE);
4756 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
4757 FALSE, FALSE, TILDE_TO_SPACES);
4758 FFAddOneChar(ffstring, '\n', FALSE);
4759 }
4760 break;
4761
4762 case Qual_class_boolean :
4763 if (qvp [idx].ble) {
4764 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
4765 FALSE, TRUE, TILDE_IGNORE);
4766 }
4767 break;
4768
4769 case Qual_class_organelle :
4770 j = (Int2) qvp [idx].num;
4771 if (j < sizeof (organelleQual) / sizeof (CharPtr)) {
4772 if (organelleQual [j] != NULL) {
4773 FFAddTextToString(ffstring, NULL, organelleQual[j], "\n",
4774 FALSE, FALSE, TILDE_IGNORE);
4775 }
4776 }
4777 break;
4778
4779 case Qual_class_orgmod :
4780 omp = qvp [idx].omp;
4781 if (lastomptype == 0 && omp != NULL) {
4782 lastomptype = omp->subtype;
4783 }
4784 while (omp != NULL && omp->subtype == lastomptype) {
4785 if (StringIsJustQuotes (omp->subname)) {
4786 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4787 FALSE, TRUE, TILDE_IGNORE);
4788 } else if (! StringHasNoText (omp->subname)) {
4789 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4790 FALSE, TRUE, TILDE_IGNORE);
4791 FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
4792 FALSE, TRUE, TILDE_TO_SPACES);
4793 }
4794 omp = omp->next;
4795 }
4796 break;
4797
4798 case Qual_class_voucher :
4799 omp = qvp [idx].omp;
4800 if (lastomptype == 0 && omp != NULL) {
4801 lastomptype = omp->subtype;
4802 }
4803 while (omp != NULL && omp->subtype == lastomptype) {
4804 if (StringIsJustQuotes (omp->subname)) {
4805 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4806 FALSE, TRUE, TILDE_IGNORE);
4807 } else if (! StringHasNoText (omp->subname)) {
4808 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
4809 FALSE, TRUE, TILDE_IGNORE);
4810 FF_www_specimen_voucher(ajp, ffstring, omp->subname);
4811 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4812 }
4813 omp = omp->next;
4814 }
4815 break;
4816
4817 case Qual_class_lat_lon :
4818 omp = qvp [idx].omp;
4819 if (lastomptype == 0 && omp != NULL) {
4820 lastomptype = omp->subtype;
4821 }
4822 while (omp != NULL && omp->subtype == lastomptype) {
4823 if (StringIsJustQuotes (omp->subname)) {
4824 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4825 FALSE, TRUE, TILDE_IGNORE);
4826 } else if (! StringHasNoText (omp->subname)) {
4827 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
4828 FALSE, TRUE, TILDE_IGNORE);
4829 FF_www_lat_lon(ajp, ffstring, omp->subname);
4830 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4831 }
4832 omp = omp->next;
4833 }
4834 break;
4835
4836 case Qual_class_subsource :
4837 ssp = qvp [idx].ssp;
4838 if (lastssptype == 0 && ssp != NULL) {
4839 lastssptype = ssp->subtype;
4840 }
4841 while (ssp != NULL && ssp->subtype == lastssptype) {
4842 if (ssp->subtype == SUBSRC_germline ||
4843 ssp->subtype == SUBSRC_rearranged ||
4844 ssp->subtype == SUBSRC_transgenic ||
4845 ssp->subtype == SUBSRC_environmental_sample ||
4846 ssp->subtype == SUBSRC_metagenomic) {
4847 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
4848 FALSE, TRUE, TILDE_TO_SPACES);
4849 } else if (StringIsJustQuotes (ssp->name)) {
4850 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
4851 FALSE, TRUE, TILDE_IGNORE);
4852 } else if (! StringHasNoText (ssp->name)) {
4853 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4854 FALSE, TRUE, TILDE_IGNORE);
4855 FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
4856 FALSE, TRUE, TILDE_TO_SPACES);
4857 }
4858 ssp = ssp->next;
4859 }
4860 break;
4861
4862 case Qual_class_pcr :
4863 if (qvp [idx].ble) {
4864 lastssptype = 0;
4865 pset = ParsePCRPrimerString (qvp);
4866 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
4867 psp = (PcrSetPtr) vnp->data.ptrvalue;
4868 if (psp == NULL) continue;
4869 str = NextPCRPrimerString (psp, FALSE, (Boolean) (pset->next != NULL));
4870 if (str == NULL) continue;
4871 if (! StringHasNoText (str)) {
4872 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4873 FALSE, TRUE, TILDE_IGNORE);
4874 FFAddTextToString(ffstring, "\"", str, "\"\n",
4875 FALSE, TRUE, TILDE_TO_SPACES);
4876 }
4877 MemFree (str);
4878 }
4879 FreePCRSet (pset);
4880 }
4881 break;
4882
4883 case Qual_class_pcr_react :
4884 prp = qvp [idx].prp;
4885 while (prp != NULL) {
4886 str = NextPCRReaction (prp, FALSE, (Boolean) (prp->next != NULL));
4887 if (StringDoesHaveText (str)) {
4888 FFAddTextToString (ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
4889 FALSE, TRUE, TILDE_IGNORE);
4890 FFAddTextToString (ffstring, "\"", str, "\"\n",
4891 FALSE, TRUE, TILDE_TO_SPACES);
4892 }
4893 MemFree (str);
4894 prp = prp->next;
4895 }
4896 break;
4897
4898 case Qual_class_pubset :
4899 break;
4900
4901 case Qual_class_quote :
4902 break;
4903
4904 case Qual_class_noquote :
4905 break;
4906
4907 case Qual_class_label :
4908 break;
4909
4910 case Qual_class_db_xref :
4911 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
4912 buf [0] = '\0';
4913 dbt = (DbtagPtr) vnp->data.ptrvalue;
4914 if (dbt != NULL && (! StringHasNoText (dbt->db))) {
4915 oip = dbt->tag;
4916 if (oip != NULL) {
4917
4918 okay = TRUE;
4919 if (ajp->flags.dropBadDbxref) {
4920 /* if RELEASE_MODE, drop unknown dbtag */
4921
4922 okay = FALSE;
4923 if (DbxrefIsValid (dbt->db, &is_rf, &is_sc, &is_bc, NULL)) {
4924 if (is_bc) {
4925 /* case counts, so suppress if bad case */
4926 } else if (is_rf && (is_other || is_gps)) {
4927 /* allow refseq dbxrefs in source feature */
4928 okay = TRUE;
4929 } else if (is_sc) {
4930 /* expect it to be in legalSrcDbXrefs list */
4931 okay = TRUE;
4932 } else if (is_est_or_gss) {
4933 /* EST and GSS records only have source feature, so allow anything */
4934 okay = TRUE;
4935 } else {
4936 /* suppress regular dbxrefs, also warn in validator */
4937 }
4938 }
4939
4940 /*
4941 okay = FALSE;
4942 for (j = 0; legalDbXrefs [j] != NULL; j++) {
4943 if (StringCmp (dbt->db, legalDbXrefs [j]) == 0) {
4944 okay = TRUE;
4945 }
4946 }
4947 */
4948 }
4949
4950 if (okay) {
4951 if (! StringHasNoText (oip->str)) {
4952 if (StringLen (dbt->db) + StringLen (oip->str) < 80) {
4953 sprintf (buf, "%s", oip->str);
4954 }
4955 } else {
4956 sprintf (buf, "%ld", (long) oip->id);
4957 }
4958 }
4959 }
4960 }
4961 if (StringDoesHaveText (buf) && dbt != NULL) {
4962 FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4963 FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
4964 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4965 }
4966 }
4967 break;
4968
4969 case Qual_class_illegal :
4970 break;
4971
4972 case Qual_class_note :
4973 if (! ajp->flags.srcQualsToNote) {
4974
4975 /* in sequin_mode and dump_mode, all orgmods and subsources show up as separate /qualifiers */
4976
4977 for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
4978
4979 lastomptype = 0;
4980 lastssptype = 0;
4981 switch (asn2gnbk_source_quals [jdx].qualclass) {
4982
4983 case Qual_class_orgmod :
4984 if (jdx == SCQUAL_orgmod_note) break;
4985 omp = qvp [jdx].omp;
4986 if (lastomptype == 0 && omp != NULL) {
4987 lastomptype = omp->subtype;
4988 }
4989 while (omp != NULL && omp->subtype == lastomptype) {
4990 if (StringIsJustQuotes (omp->subname)) {
4991 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4992 FALSE, TRUE, TILDE_IGNORE);
4993 } else if (! StringHasNoText (omp->subname)) {
4994 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
4995 FALSE, TRUE, TILDE_IGNORE);
4996 FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
4997 FALSE, TRUE, TILDE_TO_SPACES);
4998 }
4999 omp = omp->next;
5000 }
5001 break;
5002
5003 case Qual_class_voucher :
5004 if (jdx == SCQUAL_orgmod_note) break;
5005 omp = qvp [jdx].omp;
5006 if (lastomptype == 0 && omp != NULL) {
5007 lastomptype = omp->subtype;
5008 }
5009 while (omp != NULL && omp->subtype == lastomptype) {
5010 if (StringIsJustQuotes (omp->subname)) {
5011 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
5012 FALSE, TRUE, TILDE_IGNORE);
5013 } else if (! StringHasNoText (omp->subname)) {
5014 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"",
5015 FALSE, TRUE, TILDE_IGNORE);
5016 FF_www_specimen_voucher(ajp, ffstring, omp->subname);
5017 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
5018 }
5019 omp = omp->next;
5020 }
5021 break;
5022
5023 case Qual_class_subsource :
5024 if (jdx == SCQUAL_subsource_note) break;
5025 ssp = qvp [jdx].ssp;
5026 if (lastssptype == 0 && ssp != NULL) {
5027 lastssptype = ssp->subtype;
5028 }
5029 while (ssp != NULL && ssp->subtype == lastssptype) {
5030 if (ssp->subtype == SUBSRC_germline ||
5031 ssp->subtype == SUBSRC_rearranged ||
5032 ssp->subtype == SUBSRC_transgenic ||
5033 ssp->subtype == SUBSRC_environmental_sample ||
5034 ssp->subtype == SUBSRC_metagenomic) {
5035 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "\n",
5036 FALSE, TRUE, TILDE_TO_SPACES);
5037 } else if (StringIsJustQuotes (ssp->name)) {
5038 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
5039 FALSE, TRUE, TILDE_IGNORE);
5040
5041 } else if (! StringHasNoText (ssp->name)) {
5042 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
5043 FALSE, TRUE, TILDE_IGNORE);
5044 FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
5045 FALSE, TRUE, TILDE_TO_SPACES);
5046 }
5047 ssp = ssp->next;
5048 }
5049 break;
5050
5051 default :
5052 break;
5053 }
5054 }
5055 }
5056
5057 notestr = NULL;
5058 prefix = "";
5059 add_period = FALSE;
5060
5061 if (biop->genome == 8) {
5062 FFAddTextToString(unique, "", "extrachromosomal", NULL, FALSE, FALSE, TILDE_IGNORE);
5063 prefix = "\n";
5064 }
5065
5066 for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
5067
5068 lastomptype = 0;
5069 lastssptype = 0;
5070 switch (asn2gnbk_source_quals [jdx].qualclass) {
5071
5072 case Qual_class_string :
5073 if (! StringHasNoText (qvp [jdx].str)) {
5074 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, FALSE);
5075 add_period = FALSE;
5076 prefix = "\n";
5077 }
5078 break;
5079
5080 case Qual_class_orgmod :
5081 case Qual_class_voucher :
5082 if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_orgmod_note) break;
5083 omp = qvp [jdx].omp;
5084 if (lastomptype == 0 && omp != NULL) {
5085 lastomptype = omp->subtype;
5086 }
5087 while (omp != NULL && omp->subtype == lastomptype) {
5088 if (! StringHasNoText (omp->subname)) {
5089 if (jdx == SCQUAL_orgmod_note) {
5090 sprintf (buf, "%s", prefix);
5091 } else {
5092 sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
5093 }
5094
5095 str = StringSave (omp->subname);
5096 add_period = s_RemovePeriodFromEnd (str);
5097 if (jdx == SCQUAL_orgmod_note) {
5098 FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
5099 } else {
5100 FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
5101 }
5102 MemFree (str);
5103
5104 if (jdx == SCQUAL_orgmod_note) {
5105 if (add_period) {
5106 prefix = ".\n";
5107 } else {
5108 prefix = ";\n";
5109 }
5110 } else {
5111 prefix = "; ";
5112 }
5113 }
5114 omp = omp->next;
5115 }
5116 break;
5117
5118 case Qual_class_subsource :
5119 if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_subsource_note) break;
5120 ssp = qvp [jdx].ssp;
5121 if (lastssptype == 0 && ssp != NULL) {
5122 lastssptype = ssp->subtype;
5123 }
5124 while (ssp != NULL && ssp->subtype == lastssptype) {
5125 if (ssp->subtype == SUBSRC_germline ||
5126 ssp->subtype == SUBSRC_rearranged ||
5127 ssp->subtype == SUBSRC_transgenic ||
5128 ssp->subtype == SUBSRC_environmental_sample ||
5129 ssp->subtype == SUBSRC_metagenomic) {
5130 FFAddTextToString (unique, prefix, asn2gnbk_source_quals [jdx].name, NULL, FALSE, FALSE, TILDE_IGNORE);
5131 prefix = "; ";
5132 } else if (! StringHasNoText (ssp->name)) {
5133 if (jdx == SCQUAL_subsource_note) {
5134 sprintf (buf, "%s", prefix);
5135 } else {
5136 sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
5137 }
5138
5139 str = StringSave (ssp->name);
5140 add_period = s_RemovePeriodFromEnd (str);
5141 if (jdx == SCQUAL_subsource_note) {
5142 FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
5143 } else {
5144 FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
5145 }
5146 MemFree (str);
5147
5148 if (jdx == SCQUAL_subsource_note) {
5149 if (add_period) {
5150 prefix = ".\n";
5151 } else {
5152 prefix = ";\n";
5153 }
5154 } else {
5155 prefix = "; ";
5156 }
5157 }
5158 ssp = ssp->next;
5159 }
5160 break;
5161
5162 case Qual_class_pcr :
5163 if (qvp [jdx].ble) {
5164 lastssptype = 0;
5165 pset = ParsePCRPrimerString (qvp);
5166 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
5167 psp = (PcrSetPtr) vnp->data.ptrvalue;
5168 if (psp == NULL) continue;
5169 str = NextPCRPrimerString (psp, TRUE, (Boolean) (pset->next != NULL));
5170 if (str == NULL) continue;
5171 if (! StringHasNoText (str)) {
5172 FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
5173 add_period = FALSE;
5174 prefix = "; ";
5175 }
5176 MemFree (str);
5177 }
5178 FreePCRSet (pset);
5179 }
5180 break;
5181
5182 case Qual_class_pcr_react :
5183 prp = qvp [jdx].prp;
5184 while (prp != NULL) {
5185 str = NextPCRReaction (prp, TRUE, (Boolean) (prp->next != NULL));
5186 if (StringDoesHaveText (str)) {
5187 FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
5188 add_period = FALSE;
5189 prefix = "; ";
5190 }
5191 MemFree (str);
5192 prp = prp->next;
5193 }
5194 break;
5195
5196 case Qual_class_valnode :
5197 for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
5198 str = (CharPtr) vnp->data.ptrvalue;
5199 if (! StringHasNoText (str)) {
5200 FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
5201 add_period = FALSE;
5202 prefix = "; ";
5203 }
5204 }
5205 break;
5206
5207 default :
5208 break;
5209 }
5210 }
5211 if ( !FFEmpty(unique) ) {
5212 notestr = FFToCharPtr(unique);
5213
5214 if (add_period) {
5215 s_AddPeriodToEnd (notestr);
5216 }
5217
5218 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
5219 if (! IsEllipsis (notestr))
5220 s_RemovePeriodFromEnd (notestr);
5221 #endif
5222
5223 FFAddOneString (ffstring, "/note=\"", FALSE, FALSE, TILDE_IGNORE);
5224 if (is_desc) {
5225 /* AB055064.1 said TILDE_IGNORE on descriptors, but now changing policy */
5226 FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_IGNORE */ /* TILDE_EXPAND */ TILDE_SEMICOLON);
5227 } else {
5228 /* ASZ93724.1 said TILDE_EXPAND on features, but record does not exist */
5229 FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_EXPAND */ TILDE_SEMICOLON);
5230 }
5231 FFAddOneString (ffstring, "\"", FALSE, FALSE, TILDE_IGNORE);
5232
5233 MemFree (notestr);
5234 }
5235 break;
5236 default :
5237 break;
5238 }
5239 }
5240
5241 /* and then deal with the various note types separately (not in order table) */
5242
5243 if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && ajp->seqspans &&
5244 (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
5245 sprintf (sfx, "</span>");
5246 }
5247
5248 str = NULL;
5249
5250 if (js != NULL) {
5251 str = FFEndPrintEx (ajp, ffstring, afp->format, 21, 21, 5, 21, "FT", js, sfx);
5252 } else {
5253 str = FFEndPrintEx (ajp, ffstring, afp->format, 21, 21, 5, 21, "FT", pfx, sfx);
5254 }
5255
5256 MemFree (js);
5257
5258 /* optionally populate gbseq for XML-ized GenBank format */
5259
5260 if (gbseq != NULL) {
5261 if (gbfeat != NULL) {
5262 AddFeatureToGbseq (gbseq, gbfeat, str, NULL);
5263 }
5264 }
5265
5266 FFRecycleString(ajp, unique);
5267 FFRecycleString(ajp, ffstring);
5268 return str;
5269 }
5270
CountBasesByStream(CharPtr sequence,Pointer userdata)5271 static void LIBCALLBACK CountBasesByStream (
5272 CharPtr sequence,
5273 Pointer userdata
5274 )
5275
5276 {
5277 Int4Ptr base_count;
5278 Char ch;
5279 CharPtr ptr;
5280
5281 base_count = (Int4Ptr) userdata;
5282
5283 ptr = sequence;
5284 ch = *ptr;
5285 while (ch != '\0') {
5286 ch = TO_UPPER (ch);
5287 switch (ch) {
5288 case 'A' :
5289 (base_count [0])++;
5290 break;
5291 case 'C' :
5292 (base_count [1])++;
5293 break;
5294 case 'G' :
5295 (base_count [2])++;
5296 break;
5297 case 'T' :
5298 (base_count [3])++;
5299 break;
5300 default :
5301 (base_count [4])++;
5302 break;
5303 }
5304 ptr++;
5305 ch = *ptr;
5306 }
5307 }
5308
FormatBasecountBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)5309 NLM_EXTERN CharPtr FormatBasecountBlock (
5310 Asn2gbFormatPtr afp,
5311 BaseBlockPtr bbp
5312 )
5313
5314 {
5315 IntAsn2gbJobPtr ajp;
5316 Asn2gbSectPtr asp;
5317 Int4 base_count [5];
5318 BioseqPtr bsp;
5319 Char buf [80];
5320 Int2 i;
5321 Int4 len;
5322 StringItemPtr ffstring;
5323 CharPtr str;
5324
5325 if (afp == NULL || bbp == NULL) return NULL;
5326 ajp = afp->ajp;
5327 if (ajp == NULL) return NULL;
5328
5329 asp = afp->asp;
5330 if (asp == NULL) return NULL;
5331 bsp = (asp->bsp);
5332 if (bsp == NULL) return NULL;
5333
5334 /* after first formatting, result is cached into bbp->string */
5335
5336 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
5337
5338 for (i = 0; i < 5; i++) {
5339 base_count [i] = 0;
5340 }
5341
5342 if (ajp->ajp.slp != NULL) {
5343 len = SeqLocLen (ajp->ajp.slp);
5344 SeqPortStreamLoc (ajp->ajp.slp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
5345 } else {
5346 len = bsp->length;
5347 SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
5348 }
5349
5350 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
5351
5352 if (base_count [4] == 0) {
5353 sprintf (buf, "%7ld a%7ld c%7ld g%7ld t",
5354 (long) base_count [0], (long) base_count [1],
5355 (long) base_count [2], (long) base_count [3]);
5356 } else {
5357 sprintf (buf, "%7ld a%7ld c%7ld g%7ld t%7ld others",
5358 (long) base_count [0], (long) base_count [1],
5359 (long) base_count [2], (long) base_count [3],
5360 (long) base_count [4]);
5361 }
5362
5363 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
5364
5365 sprintf (buf, "Sequence %ld BP; %ld A; %ld C; %ld G; %ld T; %ld other;",
5366 (long) len,
5367 (long) base_count [0], (long) base_count [1],
5368 (long) base_count [2], (long) base_count [3],
5369 (long) base_count [4]);
5370 }
5371
5372 ffstring = FFGetString(ajp);
5373 if ( ffstring == NULL ) return NULL;
5374
5375 if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
5376 FFAddOneString(ffstring, "XX\n", FALSE, FALSE, TILDE_IGNORE);
5377 }
5378 FFStartPrint (ffstring, afp->format, 0, 0, "BASE COUNT", 12, 5, 5, "SQ", FALSE);
5379 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5380 str = FFEndPrint(ajp, ffstring, afp->format, 12, 0, 5, 5, "SQ");
5381 FFRecycleString(ajp, ffstring);
5382
5383 return str;
5384 }
5385
PrintSeqLine(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,FmtType format,CharPtr buf,BIG_ID gi,CharPtr currAccVer,Int4 startwithoutgap,Int4 start,Int4 stop)5386 static void PrintSeqLine (
5387 IntAsn2gbJobPtr ajp,
5388 StringItemPtr ffstring,
5389 FmtType format,
5390 CharPtr buf,
5391 BIG_ID gi,
5392 CharPtr currAccVer,
5393 Int4 startwithoutgap,
5394 Int4 start,
5395 Int4 stop
5396 )
5397
5398 {
5399 size_t len;
5400 Char pos [16];
5401 Int4 pad;
5402 Char tmp [64];
5403
5404 len = StringLen (buf);
5405 if (len > 0 && buf [len - 1] == ' ') {
5406 buf [len - 1] = '\0';
5407 }
5408
5409 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
5410
5411 sprintf (pos, "%9ld", (long) (start + 1));
5412 FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
5413 FFAddOneChar(ffstring, ' ', FALSE);
5414 if (ajp != NULL && GetWWW (ajp) && ajp->seqspans) {
5415 sprintf (tmp, "<span class=\"ff_line\" id=\"gi_%s_%ld\">", currAccVer, (long) (startwithoutgap + 1));
5416 FFAddOneString(ffstring, tmp, FALSE, FALSE, TILDE_TO_SPACES);
5417 }
5418 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5419 if (ajp != NULL && GetWWW (ajp) && ajp->seqspans) {
5420 FFAddOneString(ffstring, "</span>", FALSE, FALSE, TILDE_TO_SPACES);
5421 }
5422 FFAddOneChar(ffstring, '\n', FALSE);
5423 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
5424
5425 sprintf (pos, "%8ld", (long) (stop));
5426 FFAddNChar(ffstring, ' ', 5, FALSE);
5427 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5428 pad = 72 - 5 - StringLen(buf);
5429 FFAddNChar(ffstring, ' ', pad, FALSE);
5430 FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
5431 FFAddOneChar(ffstring, '\n', FALSE);
5432 }
5433 }
5434
CompressNonBases(CharPtr str)5435 static CharPtr CompressNonBases (CharPtr str)
5436
5437 {
5438 Char ch;
5439 CharPtr dst;
5440 CharPtr ptr;
5441
5442 if (str == NULL || str [0] == '\0') return NULL;
5443
5444 dst = str;
5445 ptr = str;
5446 ch = *ptr;
5447 while (ch != '\0') {
5448 if (IS_ALPHA (ch)) {
5449 *dst = ch;
5450 dst++;
5451 }
5452 ptr++;
5453 ch = *ptr;
5454 }
5455 *dst = '\0';
5456
5457 return str;
5458 }
5459
5460 static Uint1 fasta_order [NUM_SEQID] = {
5461 33, /* 0 = not set */
5462 20, /* 1 = local Object-id */
5463 15, /* 2 = gibbsq */
5464 16, /* 3 = gibbmt */
5465 30, /* 4 = giim Giimport-id */
5466 10, /* 5 = genbank */
5467 10, /* 6 = embl */
5468 10, /* 7 = pir */
5469 10, /* 8 = swissprot */
5470 15, /* 9 = patent */
5471 10, /* 10 = other = refseq */
5472 20, /* 11 = general Dbtag */
5473 255, /* 12 = gi */
5474 10, /* 13 = ddbj */
5475 10, /* 14 = prf */
5476 12, /* 15 = pdb */
5477 10, /* 16 = tpg */
5478 10, /* 17 = tpe */
5479 10, /* 18 = tpd */
5480 15, /* 19 = gpp */
5481 15 /* 20 = nat */
5482 };
5483
PrintGenome(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,SeqLocPtr slp_head,CharPtr prefix,Boolean segWithParts,Boolean is_na)5484 static void PrintGenome (
5485 IntAsn2gbJobPtr ajp,
5486 StringItemPtr ffstring,
5487 SeqLocPtr slp_head,
5488 CharPtr prefix,
5489 Boolean segWithParts,
5490 Boolean is_na
5491 )
5492 {
5493 Char buf[128], /* gibuf [32], */ vbuf [128];
5494 Boolean first = TRUE;
5495 SeqIdPtr freeid = NULL, sid = NULL, newid = NULL;
5496 SeqLocPtr slp = NULL;
5497 Int4 start = 0, stop = 0;
5498 BIG_ID gi = 0;
5499 Char currAccVer [SEQID_MAX_LEN];
5500 BioseqPtr bsp = NULL;
5501 Int2 p1 = 0, p2 = 0;
5502
5503 buf [0] = '\0';
5504 /* gibuf [0] = '\0'; */
5505 vbuf [0] = '\0';
5506 for (slp = slp_head; slp; slp = slp->next) {
5507 sid = SeqLocId (slp);
5508 if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_PNT || slp->choice == SEQLOC_WHOLE) {
5509 start = SeqLocStart (slp);
5510 stop = SeqLocStop (slp);
5511 } else if (slp->choice == SEQLOC_NULL) {
5512 sprintf (vbuf, ",%s", "gap()");
5513 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5514 continue;
5515 } else {
5516 continue;
5517 }
5518 if (sid == NULL) {
5519 continue;
5520 }
5521 newid = NULL;
5522 freeid = NULL;
5523 buf [0] = '\0';
5524 gi = 0;
5525 if (sid->choice == SEQID_GI) {
5526 gi = (BIG_ID) sid->data.intvalue;
5527 if (GetAccnVerFromServer (gi, buf)) {
5528 } else {
5529 newid = GetSeqIdForGI (gi);
5530 if (newid != NULL) {
5531 freeid = newid;
5532 }
5533 if (newid != NULL && segWithParts) {
5534 if (newid->choice == SEQID_GIBBSQ ||
5535 newid->choice == SEQID_GIBBMT ||
5536 newid->choice == SEQID_GIIM) {
5537 bsp = BioseqFind (newid);
5538 if (bsp != NULL && bsp->repr == Seq_repr_virtual) {
5539 if (bsp->length > 0) {
5540 sprintf (vbuf, ",gap(%ld)", (long) bsp->length);
5541 } else {
5542 sprintf (vbuf, ",%s", "gap()");
5543 }
5544 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5545 continue;
5546 }
5547 }
5548 }
5549 }
5550 } else if (sid->choice == SEQID_GENERAL) {
5551 newid = sid;
5552 } else {
5553 newid = sid;
5554 gi = GetGIForSeqId (sid);
5555 }
5556 if (prefix != NULL) {
5557 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
5558 }
5559 if (first) {
5560 first = FALSE;
5561 } else {
5562 FFAddOneChar (ffstring, ',', FALSE);
5563 /*ff_AddChar(',');*/
5564 }
5565 if (! StringHasNoText (buf)) {
5566 /* filled in by GetAccnVerFromServer */
5567 } else if (newid != NULL) {
5568 SeqIdWrite (SeqIdSelect (newid, fasta_order, NUM_SEQID),
5569 buf, PRINTID_TEXTID_ACC_VER, sizeof(buf) -1 );
5570 } else if (sid->choice == SEQID_GI) {
5571 SeqIdWrite (sid, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
5572 }
5573
5574 if (SeqLocStrand (slp) == Seq_strand_minus) {
5575 FFAddOneString (ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
5576 }
5577 currAccVer [0] = '\0';
5578 if (bsp != NULL) {
5579 GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, FALSE);
5580 }
5581 if ( GetWWW (ajp) && StringDoesHaveText (currAccVer)) {
5582 if (newid == NULL) {
5583 newid = sid;
5584 }
5585 if (newid->choice != SEQID_GENERAL) {
5586 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5587 if (is_na) {
5588 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
5589 } else {
5590 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
5591 }
5592 /* sprintf (gibuf, "%ld", (long) gi); */
5593 FFAddTextToString (ffstring, /* "val=" */ NULL, currAccVer, "\">", FALSE, FALSE, TILDE_IGNORE);
5594 FFAddTextToString (ffstring, NULL, buf, "</a>", FALSE, FALSE, TILDE_IGNORE);
5595 }
5596 } else {
5597 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
5598 }
5599
5600 if (SeqLocStrand (slp) == Seq_strand_minus) {
5601 sprintf (vbuf,":%ld..%ld)", (long) start+1, (long) stop+1);
5602 } else {
5603 sprintf (vbuf,":%ld..%ld", (long) start+1, (long) stop+1);
5604 }
5605 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5606 p1 += StringLen (vbuf);
5607 p2 += StringLen (vbuf);
5608 if (freeid != NULL) {
5609 freeid = SeqIdFree (freeid);
5610 }
5611 }
5612 }
5613
RevCompDelta(DeltaSeqPtr seq_ext)5614 static DeltaSeqPtr RevCompDelta (
5615 DeltaSeqPtr seq_ext
5616 )
5617
5618 {
5619 DeltaSeqPtr dsp;
5620 ValNodePtr head = NULL;
5621 Int4 from, to, tmp;
5622 SeqLocPtr nslp, slp;
5623 Boolean partial5, partial3;
5624 SeqIntPtr sintp;
5625 SeqLitPtr slitp, slip;
5626 ValNodePtr vnp;
5627
5628 for (dsp = seq_ext; dsp != NULL; dsp = dsp->next) {
5629 vnp = NULL;
5630
5631 if (dsp->choice == 1) {
5632
5633 slp = (SeqLocPtr) dsp->data.ptrvalue;
5634 if (slp != NULL) {
5635
5636 if (slp->choice == SEQLOC_NULL) {
5637
5638 nslp = ValNodeAddPointer (NULL, SEQLOC_NULL, NULL);
5639
5640 vnp = ValNodeAddPointer (NULL, 1, nslp);
5641
5642 } else if (slp->choice == SEQLOC_INT) {
5643
5644 sintp = (SeqIntPtr) slp->data.ptrvalue;
5645 if (sintp != NULL) {
5646 CheckSeqLocForPartial (slp, &partial5, &partial3);
5647 from = sintp->from;
5648 to = sintp->to;
5649 if (sintp->strand != Seq_strand_minus) {
5650 tmp = from;
5651 from = to;
5652 to = tmp;
5653 }
5654 nslp = AddIntervalToLocation (NULL, sintp->id, from, to, partial3, partial5);
5655
5656 vnp = ValNodeAddPointer (NULL, 1, nslp);
5657
5658 }
5659 }
5660 }
5661
5662 } else if (dsp->choice == 2) {
5663
5664 slitp = (SeqLitPtr) dsp->data.ptrvalue;
5665 if (slitp != NULL && slitp->seq_data == NULL) {
5666 slip = SeqLitNew ();
5667 if (slip != NULL) {
5668 slip->length = slitp->length;
5669 /* not copying fuzz */
5670 slip->seq_data_type = slitp->seq_data_type;
5671 vnp = ValNodeAddPointer (NULL, 2, (Pointer) slip);
5672 }
5673 } else {
5674 ValNodeFree (head);
5675 return NULL;
5676 }
5677 }
5678
5679 /* save in new list in reverse order */
5680
5681 if (vnp != NULL) {
5682 vnp->next = head;
5683 head = vnp;
5684 }
5685 }
5686
5687 return head;
5688 }
5689
FormatContigBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)5690 NLM_EXTERN CharPtr FormatContigBlock (
5691 Asn2gbFormatPtr afp,
5692 BaseBlockPtr bbp
5693 )
5694
5695 {
5696 IntAsn2gbJobPtr ajp;
5697 Asn2gbSectPtr asp;
5698 BioseqPtr bsp;
5699 DeltaSeqPtr delta_head = NULL;
5700 DeltaSeqPtr dsp;
5701 DeltaSeqPtr dspnext;
5702 IntFuzzPtr fuzz;
5703 GBSeqPtr gbseq;
5704 Boolean is_na;
5705 SeqLitPtr litp;
5706 DeltaSeqPtr new_delta = NULL;
5707 CharPtr prefix = NULL;
5708 Boolean rev_comp = FALSE;
5709 Boolean segWithParts = FALSE;
5710 SeqIntPtr sintp;
5711 SeqLocPtr slp;
5712 SeqLocPtr slp_head = NULL;
5713 CharPtr str;
5714 Char tmp [16];
5715 Boolean unknown;
5716 Char vbuf [32];
5717 StringItemPtr ffstring;
5718 /* CharPtr label;*/
5719
5720 if (afp == NULL || bbp == NULL) return NULL;
5721 ajp = afp->ajp;
5722 if (ajp == NULL) return NULL;
5723 asp = afp->asp;
5724 if (asp == NULL) return NULL;
5725 bsp = (asp->bsp);
5726 if (bsp == NULL) return NULL;
5727
5728 ffstring = FFGetString (ajp);
5729 if ( ffstring == NULL ) return NULL;
5730
5731 is_na = ISA_na (bsp->mol);
5732
5733 if (ajp->ajp.slp != NULL) {
5734 slp = ajp->ajp.slp;
5735 if (slp->choice == SEQLOC_INT) {
5736 sintp = (SeqIntPtr) slp->data.ptrvalue;
5737 if (sintp != NULL) {
5738 if (sintp->from == 0 && sintp->to == bsp->length - 1 && sintp->strand == Seq_strand_minus) {
5739 rev_comp = TRUE;
5740 }
5741 }
5742 }
5743 }
5744
5745 FFStartPrint (ffstring, afp->format, 0, 0, "CONTIG", 12, 5, 5, "CO", FALSE);
5746 /*
5747 if ( GetWWW(ajp) ) {
5748 label = "CONTIG ";
5749 } else {
5750 label = "CONTIG";
5751 }
5752
5753 FFAddOneString(ffstring, label, FALSE, FALSE, TILDE_IGNORE);
5754 FFAddNChar(ffstring, ' ', 12 - StringLen(label), FALSE);
5755 */
5756
5757 FFAddOneString (ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
5758
5759 if (bsp->seq_ext_type == 1) {
5760
5761 if (bsp->repr == Seq_repr_seg && SegHasParts (bsp)) {
5762 segWithParts = TRUE;
5763 }
5764
5765 slp_head = (SeqLocPtr) bsp->seq_ext;
5766 PrintGenome (ajp, ffstring, slp_head, prefix, segWithParts, is_na);
5767
5768 } else if (bsp->seq_ext_type == 4) {
5769
5770 if (rev_comp) {
5771 new_delta = RevCompDelta ((DeltaSeqPtr) bsp->seq_ext);
5772 delta_head = new_delta;
5773 } else {
5774 delta_head = (DeltaSeqPtr) bsp->seq_ext;
5775 }
5776
5777 for (dsp = delta_head; dsp != NULL; dsp = dsp->next) {
5778 if (dsp->choice == 1) {
5779
5780 slp_head = (SeqLocPtr) dsp->data.ptrvalue;
5781 PrintGenome (ajp, ffstring, slp_head, prefix, FALSE, is_na);
5782
5783 } else {
5784
5785 litp = (SeqLitPtr) dsp->data.ptrvalue;
5786 if (litp != NULL) {
5787 if (litp->seq_data != NULL && litp->seq_data_type != Seq_code_gap) {
5788 if (litp->length == 0) {
5789 sprintf (vbuf, "gap(%ld)", (long) litp->length);
5790 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5791 } else {
5792 /* don't know what to do here */
5793 }
5794 } else {
5795 unknown = FALSE;
5796 fuzz = litp->fuzz;
5797 if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
5798 unknown = TRUE;
5799 }
5800 if (unknown && litp->length > 0) {
5801 sprintf (tmp, "unk%ld", (long) litp->length);
5802 } else {
5803 sprintf (tmp, "%ld", (long) litp->length);
5804 }
5805 if (prefix != NULL) {
5806 sprintf (vbuf, "%sgap(%s)", prefix, tmp);
5807 } else {
5808 sprintf (vbuf, "gap(%s)", tmp);
5809 }
5810 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
5811 }
5812 }
5813 }
5814
5815 prefix = ",";
5816 }
5817
5818 } else if (bsp->seq_ext_type == 2) {
5819
5820 slp = (SeqLocPtr) bsp->seq_ext;
5821 PrintGenome (ajp, ffstring, slp, prefix, segWithParts, is_na);
5822
5823 }
5824
5825 FFAddOneChar (ffstring, ')', FALSE);
5826
5827 str = FFEndPrint (ajp, ffstring, afp->format, 12, 12, 5, 5, "CO");
5828 FFRecycleString (ajp, ffstring);
5829
5830 /* optionally populate gbseq for XML-ized GenBank format */
5831
5832 if (ajp->gbseq) {
5833 gbseq = &asp->gbseq;
5834 } else {
5835 gbseq = NULL;
5836 }
5837
5838 if (gbseq != NULL) {
5839 if (StringLen (str) > 12) {
5840 gbseq->contig = StringSave (str + 12);
5841 } else {
5842 gbseq->contig = StringSave (str);
5843 }
5844
5845 CleanQualValue (gbseq->contig);
5846 Asn2gnbkCompressSpaces (gbseq->contig);
5847 StripAllSpaces (gbseq->contig);
5848 }
5849
5850 if (new_delta != NULL) {
5851 dsp = new_delta;
5852 while (dsp != NULL) {
5853 dspnext = dsp->next;
5854 dsp->next = NULL;
5855 DeltaSeqFree (dsp);
5856 dsp = dsp->next;
5857 }
5858 }
5859
5860 return str;
5861 }
5862
SaveGBSeqSequence(CharPtr sequence,Pointer userdata)5863 static void LIBCALLBACK SaveGBSeqSequence (
5864 CharPtr sequence,
5865 Pointer userdata
5866 )
5867
5868 {
5869 CharPtr tmp;
5870 CharPtr PNTR tmpp;
5871
5872 tmpp = (CharPtr PNTR) userdata;
5873 tmp = *tmpp;
5874
5875 tmp = StringMove (tmp, sequence);
5876
5877 *tmpp = tmp;
5878 }
5879
InGapBlock(IntAsn2gbJobPtr ajp)5880 static Boolean InGapBlock (
5881 IntAsn2gbJobPtr ajp
5882 )
5883
5884 {
5885 return (Boolean) (ajp->seqGapCurrLen > 0);
5886 }
5887
LineIsAllGaps(CharPtr ptr)5888 static Boolean LineIsAllGaps (
5889 CharPtr ptr
5890 )
5891
5892 {
5893 Char ch;
5894 Int2 j;
5895
5896 for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5897 if (ch != '-') return FALSE;
5898 }
5899 if (j == 60) return TRUE;
5900 return FALSE;
5901 }
5902
GapAtStart(CharPtr ptr)5903 static Int2 GapAtStart (
5904 CharPtr ptr
5905 )
5906
5907 {
5908 Char ch;
5909 Int2 j;
5910
5911 for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5912 if (ch != '-') return j;
5913 }
5914 return j;
5915 }
5916
FixGapAtStart(CharPtr ptr,Char pad)5917 static void FixGapAtStart (
5918 CharPtr ptr,
5919 Char pad
5920 )
5921
5922 {
5923 Char ch;
5924 Int2 j;
5925
5926 for (ch = *ptr, j = 0; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
5927 *ptr = pad;
5928 }
5929 }
5930
GapAtEnd(CharPtr ptr)5931 static Int2 GapAtEnd (
5932 CharPtr ptr
5933 )
5934
5935 {
5936 Char ch;
5937 Int2 j;
5938 Int2 k;
5939
5940 for (ch = *ptr, j = 0, k = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5941 if (ch == '-') {
5942 k++;
5943 } else {
5944 k = 0;
5945 }
5946 }
5947 return k;
5948 }
5949
FixGapAtEnd(CharPtr ptr,Char pad)5950 static void FixGapAtEnd (
5951 CharPtr ptr,
5952 Char pad
5953 )
5954
5955 {
5956 Char ch;
5957 Int2 j;
5958
5959 j = StringLen (ptr) - GapAtEnd (ptr);
5960 ptr += j;
5961 for (ch = *ptr; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
5962 *ptr = pad;
5963 }
5964 }
5965
FixRemainingGaps(CharPtr ptr,Char pad)5966 static void FixRemainingGaps (
5967 CharPtr ptr,
5968 Char pad
5969 )
5970
5971 {
5972 Char ch;
5973 Int2 j;
5974
5975 for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
5976 if (ch == '-') {
5977 *ptr = pad;
5978 }
5979 }
5980 }
5981
ExpandSeqLine(CharPtr buf)5982 static void ExpandSeqLine (
5983 CharPtr buf
5984 )
5985
5986 {
5987 Char ch;
5988 Int2 blk, count, lin;
5989 CharPtr ptr;
5990 Char seq [80];
5991
5992 StringCpy (seq, buf);
5993
5994 count = 0;
5995 blk = 0;
5996 lin = 0;
5997
5998 ptr = seq;
5999 ch = *ptr;
6000
6001 while (ch != '\0') {
6002 buf [count] = ch;
6003 count++;
6004 ptr++;
6005 ch = *ptr;
6006
6007 blk++;
6008 lin++;
6009 if (blk >= 10 && lin < 60) {
6010
6011 buf [count] = ' ';
6012 count++;
6013 blk = 0;
6014
6015 }
6016 }
6017
6018 buf [count] = '\0';
6019 }
6020
ProcessGapSpecialFormat(Asn2gbFormatPtr afp,IntAsn2gbJobPtr ajp,BioseqPtr bsp,StringItemPtr ffstring,CharPtr buf,CharPtr nextchars)6021 static Int2 ProcessGapSpecialFormat (
6022 Asn2gbFormatPtr afp,
6023 IntAsn2gbJobPtr ajp,
6024 BioseqPtr bsp,
6025 StringItemPtr ffstring,
6026 CharPtr buf,
6027 CharPtr nextchars
6028 )
6029
6030 {
6031 Char fmt_buf [64];
6032 Char gapbuf [80];
6033 /*
6034 BIG_ID gi;
6035 Char gi_buf [32];
6036 */
6037 Char currAccVer [SEQID_MAX_LEN];
6038 Boolean is_na;
6039 Char pad;
6040 Char rgn_buf [64];
6041 SeqIdPtr sip;
6042 SeqLocPtr slp;
6043 Int2 startgapgap = 0, endgap = 0;
6044 Int4 from, to;
6045
6046 is_na = ISA_na (bsp->mol);
6047 if (is_na) {
6048 pad = 'n';
6049 } else {
6050 pad = 'x';
6051 }
6052
6053 if (LineIsAllGaps (buf)) {
6054 ajp->seqGapCurrLen += StringLen (buf);
6055 *buf = '\0';
6056 return 0;
6057 }
6058
6059 startgapgap = GapAtStart (buf);
6060 if (InGapBlock (ajp)) {
6061 ajp->seqGapCurrLen += startgapgap;
6062 if (is_na) {
6063 sprintf (gapbuf, " [gap %ld bp]", (long) ajp->seqGapCurrLen);
6064 } else {
6065 sprintf (gapbuf, " [gap %ld aa]", (long) ajp->seqGapCurrLen);
6066 }
6067 FFAddOneString (ffstring, gapbuf, FALSE, FALSE, TILDE_TO_SPACES);
6068 if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && afp != NULL &&
6069 (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
6070 /*
6071 gi = 0;
6072 for (sip = bsp->id; sip != NULL; sip = sip->next) {
6073 if (sip->choice == SEQID_GI) {
6074 gi = (BIG_ID) sip->data.intvalue;
6075 }
6076 }
6077 */
6078 currAccVer [0] = '\0';
6079 GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, FALSE);
6080 if (StringDoesHaveText (currAccVer)) {
6081 /* sprintf(gi_buf, "%ld", (long) gi); */
6082 sprintf(fmt_buf, "?fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY);
6083 if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
6084 StringCat (fmt_buf, "&report=gbwithparts");
6085 if (ajp->ajp.slp != NULL) {
6086 slp = ajp->ajp.slp;
6087 from = SeqLocStart (slp) + 1;
6088 to = SeqLocStop (slp) + 1;
6089 sprintf (rgn_buf, "&from=%ld&to=%ld", (long) from, (long) to);
6090 StringCat (fmt_buf, rgn_buf);
6091 }
6092 }
6093 FFAddOneString (ffstring, " <a href=\"", FALSE, FALSE, TILDE_IGNORE);
6094 if (is_na) {
6095 FF_Add_NCBI_Base_URL (ffstring, link_featn);
6096 } else {
6097 FF_Add_NCBI_Base_URL (ffstring, link_featp);
6098 }
6099 FFAddOneString (ffstring, currAccVer, FALSE, FALSE, TILDE_IGNORE);
6100 FFAddOneString (ffstring, fmt_buf, FALSE, FALSE, TILDE_IGNORE);
6101 FFAddOneString (ffstring, "\">Expand Ns", FALSE, FALSE, TILDE_IGNORE);
6102 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
6103 }
6104 }
6105 FFAddOneChar (ffstring, '\n', FALSE);
6106 ajp->seqGapCurrLen = 0;
6107 FixGapAtStart (buf, ' ');
6108 } else if (startgapgap > 0) {
6109 FixGapAtStart (buf, pad);
6110 startgapgap = 0;
6111 }
6112
6113 endgap = GapAtEnd (buf);
6114 if (LineIsAllGaps (nextchars)) {
6115 FixGapAtEnd (buf, ' ');
6116 ajp->seqGapCurrLen += endgap;
6117 } else if (endgap > 0) {
6118 /*
6119 FixGapAtEnd (buf, pad);
6120 */
6121 FixGapAtEnd (buf, ' ');
6122 ajp->seqGapCurrLen += endgap;
6123 }
6124
6125 FixRemainingGaps (buf, pad);
6126
6127 return startgapgap;
6128 }
6129
6130 /*
6131 static void ChangeOandJtoX (CharPtr str)
6132
6133 {
6134 Char ch;
6135
6136 if (str == NULL) return;
6137 ch = *str;
6138 while (ch != '\0') {
6139 if (ch == 'O' || ch == 'J') {
6140 *str = 'X';
6141 } else if (ch == 'o' || ch == 'j') {
6142 *str = 'x';
6143 }
6144 str++;
6145 ch = *str;
6146 }
6147 }
6148 */
6149
FormatSequenceBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)6150 NLM_EXTERN CharPtr FormatSequenceBlock (
6151 Asn2gbFormatPtr afp,
6152 BaseBlockPtr bbp
6153 )
6154
6155 {
6156 IntAsn2gbJobPtr ajp;
6157 Asn2gbSectPtr asp;
6158 Int2 blk;
6159 BioseqPtr bsp;
6160 Bioseq bsq;
6161 Char buf [80];
6162 Char ch;
6163 Int2 count;
6164 Int4 extend;
6165 StreamFlgType flags = STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL;
6166 GBSeqPtr gbseq;
6167 BIG_ID gi = 0;
6168 IntAsn2gbSectPtr iasp;
6169 Int2 lin;
6170 SeqLocPtr loc;
6171 Int4 num;
6172 CharPtr ptr;
6173 Int4 remaining;
6174 SeqBlockPtr sbp;
6175 SeqIdPtr sip;
6176 SeqLoc sl;
6177 SeqLocPtr slp;
6178 Int4 start;
6179 Int2 startgapgap;
6180 Int4 stop;
6181 CharPtr str = NULL;
6182 CharPtr tmp;
6183 StringItemPtr ffstring;
6184 Char currAccVer [SEQID_MAX_LEN];
6185
6186 if (afp == NULL || bbp == NULL) return NULL;
6187 sbp = (SeqBlockPtr) bbp;
6188 ajp = afp->ajp;
6189 if (ajp == NULL) return NULL;
6190 asp = afp->asp;
6191 if (asp == NULL) return NULL;
6192 iasp = (IntAsn2gbSectPtr) asp;
6193 bsp = (asp->bsp);
6194 if (bsp == NULL) return NULL;
6195
6196 /* if GBSeq XML, use SeqPortStream on single block */
6197
6198 if (ajp->gbseq) {
6199 gbseq = &asp->gbseq;
6200
6201 if (ajp->ajp.slp != NULL) {
6202 slp = ajp->ajp.slp;
6203 str = MemNew (sizeof (Char) * (SeqLocLen (slp) + 10));
6204 } else {
6205 str = MemNew (sizeof (Char) * (bsp->length + 10));
6206 }
6207 if (str == NULL) return NULL;
6208
6209 tmp = str;
6210 if (ajp->ajp.slp != NULL) {
6211 slp = ajp->ajp.slp;
6212 SeqPortStreamLoc (slp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
6213 } else {
6214 SeqPortStream (bsp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
6215 }
6216 /*
6217 if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
6218 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6219 ChangeOandJtoX (str);
6220 }
6221 }
6222 */
6223 gbseq->sequence = StringSave (str);
6224
6225 tmp = gbseq->sequence;
6226 if (tmp == NULL) return NULL;
6227 ch = *tmp;
6228 while (ch != '\0') {
6229 if (ch == '\n' || ch == '\r' || ch == '\t') {
6230 *tmp = ' ';
6231 } else if (IS_UPPER (ch)) {
6232 /* collab decision to present target sequence in lower case */
6233 *tmp = TO_LOWER (ch);
6234 }
6235 tmp++;
6236 ch = *tmp;
6237 }
6238 TrimSpacesAroundString (gbseq->sequence);
6239 CompressNonBases (gbseq->sequence);
6240
6241 return str;
6242 }
6243
6244 /* replace SeqPort with improved SeqPortStream */
6245
6246 if (sbp->bases == NULL) {
6247 if (ajp->specialGapFormat) {
6248 flags = EXPAND_GAPS_TO_DASHES | STREAM_CORRECT_INVAL;
6249 }
6250
6251 start = sbp->start;
6252 stop = sbp->stop;
6253 extend = sbp->extend;
6254
6255 if (stop > start) {
6256
6257 str = MemNew (sizeof (Char) * (extend - start + 3));
6258 if (str != NULL) {
6259 if (ajp->ajp.slp != NULL) {
6260 slp = ajp->ajp.slp;
6261 MemSet ((Pointer) &bsq, 0, sizeof (Bioseq));
6262 MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
6263 bsq.repr = Seq_repr_seg;
6264 bsq.mol = bsp->mol;
6265 bsq.seq_ext_type = 1;
6266 bsq.length = SeqLocLen (slp);
6267 bsq.seq_ext = &sl;
6268 if (slp->choice == SEQLOC_MIX || slp->choice == SEQLOC_PACKED_INT) {
6269 loc = (SeqLocPtr) slp->data.ptrvalue;
6270 if (loc != NULL) {
6271 sl.choice = loc->choice;
6272 sl.data.ptrvalue = (Pointer) loc->data.ptrvalue;
6273 sl.next = loc->next;
6274 }
6275 } else {
6276 sl.choice = slp->choice;
6277 sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
6278 sl.next = NULL;
6279 }
6280 SeqPortStreamInt (&bsq, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
6281 } else {
6282 num = SeqPortStreamInt (bsp, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
6283 if (num < 1) {
6284 /* flag possible inconsistency between bsp->length and actual sequence data length */
6285 ajp->relModeError = TRUE;
6286 return NULL;
6287 }
6288 }
6289 /*
6290 if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
6291 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6292 ChangeOandJtoX (str);
6293 }
6294 }
6295 */
6296 sbp->bases = str;
6297 }
6298 }
6299 }
6300
6301 if (sbp->bases == NULL) return NULL;
6302
6303 for (sip = bsp->id; sip != NULL; sip = sip->next) {
6304 if (sip->choice != SEQID_GI) continue;
6305 gi = (BIG_ID) sip->data.intvalue;
6306 }
6307
6308 currAccVer [0] = '\0';
6309 GetAccVerForBioseq (bsp, currAccVer, sizeof (currAccVer), ajp->hideGI, TRUE);
6310
6311 /* format subsequence cached with SeqPortStream */
6312
6313 ffstring = FFGetString (ajp);
6314
6315 start = sbp->start;
6316 stop = sbp->stop;
6317 remaining = stop - start;
6318
6319 count = 0;
6320 blk = 0;
6321 lin = 0;
6322
6323 ptr = sbp->bases;
6324 ch = *ptr;
6325
6326 while (ch != '\0' && remaining > 0) {
6327 buf [count] = (Char) (TO_LOWER (ch));
6328 count++;
6329 remaining--;
6330 ptr++;
6331 ch = *ptr;
6332
6333 blk++;
6334 lin++;
6335 if (lin >= 60) {
6336
6337 buf [count] = '\0';
6338 startgapgap = 0;
6339 if (ajp->specialGapFormat) {
6340 startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
6341 }
6342 if (StringDoesHaveText (buf)) {
6343 ExpandSeqLine (buf);
6344 PrintSeqLine (ajp, ffstring, afp->format, buf, gi, currAccVer, start, start + startgapgap, start + lin);
6345 }
6346 count = 0;
6347 blk = 0;
6348 lin = 0;
6349 start += 60;
6350 }
6351 }
6352
6353 buf [count] = '\0';
6354 if (count > 0) {
6355 startgapgap = 0;
6356 if (ajp->specialGapFormat) {
6357 startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
6358 }
6359 if (StringDoesHaveText (buf)) {
6360 ExpandSeqLine (buf);
6361 PrintSeqLine (ajp, ffstring, afp->format, buf, gi, currAccVer, start, start + startgapgap, start + lin);
6362 }
6363 }
6364
6365 str = FFToCharPtr(ffstring);
6366
6367 FFRecycleString (ajp, ffstring);
6368 return str;
6369 }
6370
6371 /*
6372 static CharPtr insd_strd [4] = {
6373 NULL, "single", "double", "mixed"
6374 };
6375
6376 static CharPtr insd_mol [10] = {
6377 "?", "DNA", "RNA", "tRNA", "rRNA", "mRNA", "uRNA", "snRNA", "snoRNA", "AA"
6378 };
6379
6380 static CharPtr insd_top [3] = {
6381 NULL, "linear", "circular"
6382 };
6383 */
6384
6385 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
6386
FormatSlashBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)6387 NLM_EXTERN CharPtr FormatSlashBlock (
6388 Asn2gbFormatPtr afp,
6389 BaseBlockPtr bbp
6390 )
6391
6392 {
6393 IntAsn2gbJobPtr ajp;
6394 Asn2gbSectPtr asp;
6395 GBFeaturePtr currf, headf, nextf;
6396 GBReferencePtr currr, headr, nextr;
6397 Uint1 featdeftype;
6398 GBSeqPtr gbseq, gbtmp;
6399 IntAsn2gbSectPtr iasp;
6400 IndxPtr index;
6401 INSDSeq is;
6402 /*
6403 Int2 moltype, strandedness, topology;
6404 */
6405
6406 if (afp == NULL || bbp == NULL) return NULL;
6407 ajp = afp->ajp;
6408 if (ajp == NULL) return NULL;
6409 asp = afp->asp;
6410 if (asp == NULL) return NULL;
6411
6412 iasp = (IntAsn2gbSectPtr) asp;
6413
6414 /* sort and unique indexes */
6415
6416 index = ajp->index;
6417
6418 if (index != NULL) {
6419
6420 MemCopy (index, &asp->index, sizeof (IndxBlock));
6421 MemSet (&asp->index, 0, sizeof (IndxBlock));
6422
6423 index->authors = ValNodeSort (index->authors, SortVnpByString);
6424 index->authors = UniqueValNode (index->authors);
6425
6426 index->genes = ValNodeSort (index->genes, SortVnpByString);
6427 index->genes = UniqueValNode (index->genes);
6428
6429 index->journals = ValNodeSort (index->journals, SortVnpByString);
6430 index->journals = UniqueValNode (index->journals);
6431
6432 index->keywords = ValNodeSort (index->keywords, SortVnpByString);
6433 index->keywords = UniqueValNode (index->keywords);
6434
6435 index->secondaries = ValNodeSort (index->secondaries, SortVnpByString);
6436 index->secondaries = UniqueValNode (index->secondaries);
6437 }
6438
6439 /* adjust XML-ized GenBank format */
6440
6441 gbseq = ajp->gbseq;
6442
6443 if (gbseq != NULL) {
6444
6445 MemCopy (gbseq, &asp->gbseq, sizeof (GBSeq));
6446 MemSet (&asp->gbseq, 0, sizeof (GBSeq));
6447
6448 /* reverse order of references */
6449
6450 headr = NULL;
6451 for (currr = gbseq->references; currr != NULL; currr = nextr) {
6452 nextr = currr->next;
6453 currr->next = headr;
6454 headr = currr;
6455 }
6456 gbseq->references = headr;
6457
6458 /* reverse order of features */
6459
6460 headf = NULL;
6461 for (currf = gbseq->feature_table; currf != NULL; currf = nextf) {
6462 nextf = currf->next;
6463 currf->next = headf;
6464 headf = currf;
6465 }
6466 gbseq->feature_table = headf;
6467 }
6468
6469 /* if generating GBSeq XML/ASN, write at each slash block */
6470
6471 if (gbseq != NULL && afp->aip != NULL) {
6472 if (ajp->produceInsdSeq) {
6473 MemSet ((Pointer) &is, 0, sizeof (INSDSeq));
6474 is.next = (INSDSeqPtr) gbseq->next;
6475 is.OBbits__ = gbseq->OBbits__;
6476 is.locus = gbseq->locus;
6477 is.length = gbseq->length;
6478 is.strandedness = gbseq->strandedness;
6479 is.moltype = gbseq->moltype;
6480 is.topology = gbseq->topology;
6481 /*
6482 strandedness = (Int2) gbseq->strandedness;
6483 if (strandedness < 0 || strandedness > 3) {
6484 strandedness = 0;
6485 }
6486 is.strandedness = StringSave (insd_strd [strandedness]);
6487 moltype = (Int2) gbseq->moltype;
6488 if (moltype < 0 || moltype > 9) {
6489 moltype = 0;
6490 }
6491 is.moltype = StringSave (insd_mol [moltype]);
6492 topology = (Int2) gbseq->topology;
6493 if (topology < 0 || topology > 2) {
6494 topology = 0;
6495 }
6496 is.topology = StringSave (insd_top [topology]);
6497 */
6498 is.division = gbseq->division;
6499 is.update_date = gbseq->update_date;
6500 is.create_date = gbseq->create_date;
6501 is.update_release = gbseq->update_release;
6502 is.create_release = gbseq->create_release;
6503 is.definition = gbseq->definition;
6504 is.primary_accession = gbseq->primary_accession;
6505 is.entry_version = gbseq->entry_version;
6506 is.accession_version = gbseq->accession_version;
6507 is.other_seqids = gbseq->other_seqids;
6508 is.secondary_accessions = gbseq->secondary_accessions;
6509 is.project = gbseq->project;
6510 is.keywords = gbseq->keywords;
6511 is.segment = gbseq->segment;
6512 is.source = gbseq->source;
6513 is.organism = gbseq->organism;
6514 is.taxonomy = gbseq->taxonomy;
6515 is.references = (INSDReferencePtr) gbseq->references;
6516 is.comment = gbseq->comment;
6517 is.comment_set = (INSDCommentPtr) gbseq->comment_set;
6518 is.struc_comments = (INSDStrucCommentPtr) gbseq->struc_comments;
6519 is.primary = gbseq->primary;
6520 is.source_db = gbseq->source_db;
6521 is.database_reference = gbseq->database_reference;
6522 is.feature_table = (INSDFeaturePtr) gbseq->feature_table;
6523 is.feature_set = (INSDFeatureSetPtr) gbseq->feature_set;
6524 is.sequence = gbseq->sequence;
6525 is.contig = gbseq->contig;
6526 is.alt_seq = (INSDAltSeqDataPtr) gbseq->alt_seq;
6527 is.xrefs = (INSDXrefPtr) gbseq->xrefs;
6528 INSDSeqAsnWrite (&is, afp->aip, afp->atp);
6529 } else {
6530 GBSeqAsnWrite (gbseq, afp->aip, afp->atp);
6531 }
6532 if (afp->atp == NULL) {
6533 AsnPrintNewLine (afp->aip);
6534 }
6535 AsnIoFlush (afp->aip);
6536
6537 /* clean up gbseq fields */
6538
6539 gbtmp = GBSeqNew ();
6540 MemCopy (gbtmp, gbseq, sizeof (GBSeq));
6541 MemSet (gbseq, 0, sizeof (GBSeq));
6542 GBSeqFree (gbtmp);
6543 }
6544
6545 /* then clean up javascript components */
6546
6547 iasp->gi = MemFree (iasp->gi);
6548 iasp->acc = MemFree (iasp->acc);
6549 for (featdeftype = 0; featdeftype < FEATDEF_MAX; featdeftype++) {
6550 iasp->feat_key [featdeftype] = MemFree (iasp->feat_key [featdeftype]);
6551 }
6552
6553 /* slash has string pre-allocated by add slash block function */
6554
6555 return StringSaveNoNull (bbp->string);
6556 }
6557
6558
6559