1 /*   asn2gnb5.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2gnb5.c
27 *
28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 *          Mati Shomrat
30 *
31 * Version Creation Date:   10/21/98
32 *
33 * $Revision: 1.286 $
34 *
35 * File Description:  New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41 
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59 
60 #ifdef WIN_MAC
61 #if __profile__
62 #include <Profiler.h>
63 #endif
64 #endif
65 
66 /* URLs */
67 
68 
69 static CharPtr link_muid = "https://www.ncbi.nlm.nih.gov/pubmed/";
70 
71 static CharPtr link_uspto = "http://patft.uspto.gov/netacgi/nph-Parser?patentnumber=";
72 
73 static CharPtr link_cambia = "http://www.patentlens.net/patentlens/simple.cgi?patnum=";
74 
75 static CharPtr link_doi = "http://dx.doi.org/";
76 
77 
78 /* www utility functions */
79 
GetWWW(IntAsn2gbJobPtr ajp)80 NLM_EXTERN Boolean GetWWW (IntAsn2gbJobPtr ajp) {
81     return ajp->www;
82 }
83 
FiniWWW(IntAsn2gbJobPtr ajp)84 NLM_EXTERN void FiniWWW (IntAsn2gbJobPtr ajp) {
85     ajp->www = FALSE;
86 }
87 
InitWWW(IntAsn2gbJobPtr ajp)88 NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp)
89 {
90   ajp->www = TRUE;
91 }
92 
FF_www_featloc(StringItemPtr ffstring,CharPtr loc)93 NLM_EXTERN void FF_www_featloc(StringItemPtr ffstring, CharPtr loc)
94 {
95   CharPtr ptr;
96 
97   if (loc == NULL) return;
98 
99   for ( ptr = loc; *ptr != '\0'; ++ptr ) {
100     switch (*ptr) {
101     case '<' :
102       /*FFAddOneString (ffstring, "<", FALSE, FALSE, TILDE_IGNORE);*/
103       FFAddOneString (ffstring, "&lt;", FALSE, FALSE, TILDE_IGNORE);
104       break;
105     case '>' :
106       /*FFAddOneString (ffstring, ">", FALSE, FALSE, TILDE_IGNORE);*/
107       FFAddOneString (ffstring, "&gt;", FALSE, FALSE, TILDE_IGNORE);
108       break;
109     default:
110       FFAddOneChar(ffstring, *ptr, FALSE);
111       break;
112     }
113   }
114 }
115 
116 
117 /* ************** */
118 
LooksLikeAccession(CharPtr accession,Int2Ptr alphaP,Int2Ptr digitP,Int2Ptr unscrP)119 static Boolean LooksLikeAccession (
120   CharPtr accession,
121   Int2Ptr alphaP,
122   Int2Ptr digitP,
123   Int2Ptr unscrP
124 )
125 
126 {
127   Char     ch;
128   Int2     numAlpha = 0;
129   Int2     numDigits = 0;
130   Int2     numUndersc = 0;
131   CharPtr  str;
132 
133   if (accession == NULL || accession [0] == '\0') return FALSE;
134 
135   if (StringLen (accession) >= 16) return FALSE;
136 
137   if (accession [0] < 'A' || accession [0] > 'Z') return FALSE;
138 
139   str = accession;
140   if (StringNCmp (str, "NZ_", 3) == 0) {
141     str += 3;
142   }
143   ch = *str;
144   while (IS_ALPHA (ch)) {
145     numAlpha++;
146     str++;
147     ch = *str;
148   }
149   while (ch == '_') {
150     numUndersc++;
151     str++;
152     ch = *str;
153   }
154   while (IS_DIGIT (ch)) {
155     numDigits++;
156     str++;
157     ch = *str;
158   }
159   if (ch != '\0' && ch != ' ' && ch != '.') return FALSE;
160 
161   if (numUndersc > 1) return FALSE;
162 
163   if (alphaP != NULL) {
164     *alphaP = numAlpha;
165   }
166   if (digitP != NULL) {
167     *digitP = numDigits;
168   }
169   if (unscrP != NULL) {
170     *unscrP = numUndersc;
171   }
172 
173   if (numUndersc == 0) {
174     if (numAlpha == 1 && numDigits == 5) return TRUE;
175     if (numAlpha == 2 && numDigits == 6) return TRUE;
176     if (numAlpha == 3 && numDigits == 5) return TRUE;
177     if (numAlpha == 4 && numDigits == 8) return TRUE;
178     if (numAlpha == 4 && numDigits == 9) return TRUE;
179     if (numAlpha == 5 && numDigits == 7) return TRUE;
180   } else if (numUndersc == 1) {
181     if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return FALSE;
182     if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
183       if (accession [1] == 'M' ||
184           accession [1] == 'C' ||
185           accession [1] == 'T' ||
186           accession [1] == 'P' ||
187           accession [1] == 'G' ||
188           accession [1] == 'R' ||
189           accession [1] == 'S' ||
190           accession [1] == 'W' ||
191           accession [1] == 'Z') {
192         return TRUE;
193       }
194     }
195     if (accession [0] == 'A' || accession [0] == 'Y') {
196       if (accession [1] == 'P') return TRUE;
197     }
198   }
199 
200   return FALSE;
201 }
202 
203 typedef struct dbxrefurldata {
204   CharPtr  tag;
205   CharPtr  url;
206 } UrlData, PNTR UrlDataPtr;
207 
208 static UrlData Nlm_url_base [] = {
209   {"AceView/WormGenes",     "https://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?db=worm&c=gene&q="},
210   {"AFTOL",                 "http://wasabi.lutzonilab.net/pub/displayTaxonInfo?aftol_id="},
211   {"AntWeb",                "http://www.antweb.org/specimen.do?name="},
212   {"APHIDBASE",             "http://bipaa.genouest.org/apps/grs-2.3/grs?reportID=aphidbase_transcript_report&objectID="},
213   {"ApiDB",                 "http://www.apidb.org/apidb/showRecord.do?name=GeneRecordClasses.ApiDBGeneRecordClass&primary_key="},
214   {"ApiDB_CryptoDB",        "http://cryptodb.org/cryptodb/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=CryptoDB&source_id="},
215   {"ApiDB_PlasmoDB",        "http://plasmodb.org/plasmo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=PlasmoDB&source_id="},
216   {"ApiDB_ToxoDB",          "http://toxodb.org/toxo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=ToxoDB&source_id="},
217   {"Araport",               "https://www.araport.org/locus/"},
218   {"ASAP",                  "https://asap.genetics.wisc.edu/asap/feature_info.php?FeatureID="},
219   {"ATCC",                  "http://www.atcc.org/Products/All/"},
220   {"Axeldb",                "http://www.dkfz-heidelberg.de/tbi/services/axeldb/clone/xenopus?name="},
221   {"BEEBASE",               "http://hymenopteragenome.org/cgi-bin/gb2/gbrowse/bee_genome45/?name="},
222   {"BEETLEBASE",            "http://www.beetlebase.org/cgi-bin/report.cgi?name="},
223   {"BEI",                   "https://www.beiresources.org/Catalog/animalViruses/"},
224   {"BGD",                   "http://bovinegenome.org/genepages/btau40/genes/"},
225   {"BioProject",            "https://www.ncbi.nlm.nih.gov/bioproject/"},
226   {"BioSample",             "https://www.ncbi.nlm.nih.gov/biosample/"},
227   {"BOLD",                  "http://www.boldsystems.org/connectivity/specimenlookup.php?processid="},
228   {"CCDS",                  "https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA="},
229   {"CDD",                   "https://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid="},
230   {"CGD",                   "http://www.candidagenome.org/cgi-bin/locus.pl?locus="},
231   {"CGNC",                  "http://birdgenenames.org/cgnc/GeneReport?id="},
232   {"CK",                    "http://flybane.berkeley.edu/cgi-bin/cDNA/CK_clone.pl?db=CK&dbid="},
233   {"COG",                   "https://www.ncbi.nlm.nih.gov/COG/new/release/cow.cgi?cog="},
234   {"CollecTF",              "http://collectf.umbc.edu/"},
235   {"dbClone",               "https://www.ncbi.nlm.nih.gov/sites/entrez?db=clone&cmd=Retrieve&list_uids="},
236   {"dbCloneLib",            "https://www.ncbi.nlm.nih.gov/sites/entrez?db=clonelib&cmd=Retrieve&list_uids="},
237   {"dbEST",                 "https://www.ncbi.nlm.nih.gov/nucest/"},
238   {"dbProbe",               "https://www.ncbi.nlm.nih.gov/sites/entrez?db=probe&cmd=Retrieve&list_uids="},
239   {"dbSNP",                 "https://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs="},
240   {"dbSTS",                 "https://www.ncbi.nlm.nih.gov/nuccore/"},
241   {"dictyBase",             "http://dictybase.org/db/cgi-bin/gene_page.pl?dictybaseid="},
242   {"DSM",                   "https://www.dsmz.de/catalogues/details/culture/DSM-"},
243   {"DSMZ",                  "https://www.dsmz.de/catalogues/details/culture/PV-"},
244   {"ECOCYC",                "http://biocyc.org/ECOLI/new-image?type=GENE&object="},
245   {"EcoGene",               "http://www.ecogene.org/gene/"},
246   {"ENSEMBL",               "http://www.ensembl.org/id/"},
247   {"EnsemblGenomes",        "http://ensemblgenomes.org/id/"},
248   {"EnsemblGenomes-Gn",     "http://ensemblgenomes.org/id/"},
249   {"EnsemblGenomes-Tr",     "http://ensemblgenomes.org/id/"},
250   {"FANTOM_DB",             "http://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid="},
251   {"FBOL",                  "http://www.fungalbarcoding.org/BioloMICS.aspx?Table=Fungal%20barcodes&Fields=All&Rec="},
252   {"FLYBASE",               "http://flybase.bio.indiana.edu/.bin/fbidq.html?"},
253   {"Fungorum",              "http://www.indexfungorum.org/Names/NamesRecord.asp?RecordID="},
254   {"GABI",                  "http://www.gabipd.org/database/cgi-bin/GreenCards.pl.cgi?Mode=ShowSequence&App=ncbi&SequenceId="},
255   {"GenBank",               "https://www.ncbi.nlm.nih.gov/nuccore/"},
256   {"GeneDB",                "http://old.genedb.org/genedb/Search?organism=All%3A*&name="},
257   {"GeneID",                "https://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=Retrieve&dopt=full_report&list_uids="},
258   {"GO",                    "http://amigo.geneontology.org/amigo/term/GO:"},
259   {"GOA",                   "http://www.ebi.ac.uk/ego/GProtein?ac="},
260   {"Greengenes",            "http://greengenes.lbl.gov/cgi-bin/show_one_record_v2.pl?prokMSA_id="},
261   {"GRIN",                  "http://www.ars-grin.gov/cgi-bin/npgs/acc/display.pl?"},
262   {"H-InvDB",               "http://www.h-invitational.jp"},
263   {"HGNC",                  "http://www.genenames.org/cgi-bin/gene_symbol_report?hgnc_id=HGNC:"},
264   {"HMP",                   "http://www.hmpdacc.org/catalog/grid.php?dataset=genomic&hmp_id="},
265   {"HOMD",                  "http://www.homd.org/"},
266   {"HPM",                   "http://www.humanproteomemap.org/protein.php?hpm_id="},
267   {"HPRD",                  "http://www.hprd.org/protein/"},
268   {"HSSP",                  "http://mrs.cmbi.ru.nl/m6/search?db=all&q="},
269   {"I5KNAL",                "https://i5k.nal.usda.gov/"},
270   {"IKMC",                  "http://www.knockoutmouse.org/martsearch/project/"},
271   {"IMGT/GENE-DB",          "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Homo+sapiens&query=2+"},
272   {"IMGT/HLA",              "http://www.ebi.ac.uk/cgi-bin/ipd/imgt/hla/get_allele.cgi?"},
273   {"IMGT/LIGM",             "http://www.imgt.org/cgi-bin/IMGTlect.jv?query=201+"},
274   {"InterimID",             "https://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
275   {"InterPro",              "http://www.ebi.ac.uk/interpro/ISearch?mode=ipr&query="},
276   {"IntrepidBio",           "http://server1.intrepidbio.com/FeatureBrowser/gene/browse/"},
277   {"IRD",                   "http://www.fludb.org/brc/fluSegmentDetails.do?irdSubmissionId="},
278   {"ISD",                   "http://www.flu.lanl.gov/search/view_record.html?accession="},
279   {"ISFinder",              "http://www-is.biotoul.fr/scripts/is/is_spec.idc?name="},
280   {"ISHAM-ITS",             "http://its.mycologylab.org/BioloMICS.aspx?Table=Sequences&ExactMatch=T&Name=MITS"},
281   {"JCM",                   "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM="},
282   {"JGIDB",                 "http://genome.jgi-psf.org/cgi-bin/jgrs?id="},
283   {"LocusID",               "https://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
284   {"MaizeGDB",              "http://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?"},
285   {"MedGen",                "https://www.ncbi.nlm.nih.gov/medgen/"},
286   {"MGI",                   "http://www.informatics.jax.org/marker/MGI:"},
287   {"MIM",                   "https://www.ncbi.nlm.nih.gov/omim/"},
288   {"miRBase",               "http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc="},
289   {"MycoBank",              "http://www.mycobank.org/MycoTaxo.aspx?Link=T&Rec="},
290   {"NASONIABASE",           "http://hymenopteragenome.org/cgi-bin/gbrowse/nasonia10_scaffold/?name="},
291   {"NBRC",                  "http://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT="},
292   {"NextDB",                "http://nematode.lab.nig.ac.jp/cgi-bin/db/ShowGeneInfo.sh?celk="},
293   {"niaEST",                "http://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1="},
294   {"NMPDR",                 "http://www.nmpdr.org/linkin.cgi?id="},
295   {"NRESTdb",               "http://genome.ukm.my/nrestdb/db/single_view_est.php?id="},
296   {"OrthoMCL",              "http://orthomcl.org/orthomcl/showRecord.do?name=GroupRecordClasses.GroupRecordClass&group_name="},
297   {"Osa1",                  "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name="},
298   {"PBmice",                "http://www.idmshanghai.cn/PBmice/DetailedSearch.do?type=insert&id="},
299   {"PBR",                   "http://www.poxvirus.org/query.asp?web_id="},
300   {"PDB",                   "http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId="},
301   {"PFAM",                  "http://pfam.sanger.ac.uk/family?acc="},
302   {"PGN",                   "http://pgn.cornell.edu/cgi-bin/search/seq_search_result.pl?identifier="},
303   {"Phytozome",             "http://www.phytozome.net/genePage.php?db=Phytozome&crown&method=0&search=1&detail=1&searchText=locusname:"},
304   {"PomBase",               "http://www.pombase.org/spombe/result/"},
305   {"PseudoCap",             "http://www.pseudomonas.com/getAnnotation.do?locusID="},
306   {"RAP-DB",                "http://rapdb.dna.affrc.go.jp/cgi-bin/gbrowse_details/latest?name="},
307   {"RATMAP",                "http://ratmap.gen.gu.se/ShowSingleLocus.htm?accno="},
308   {"RBGE_garden",           "http://data.rbge.org.uk/living/"},
309   {"RBGE_herbarium",        "http://data.rbge.org.uk/herb/"},
310   {"REBASE",                "http://rebase.neb.com/rebase/enz/"},
311   {"RefSeq",                "https://www.ncbi.nlm.nih.gov/nuccore/"},
312   {"RFAM",                  "http://rfam.xfam.org/family/"},
313   {"RGD",                   "http://rgd.mcw.edu/rgdweb/search/search.html?term="},
314   {"RiceGenes",             "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object="},
315   {"SEED",                  "http://www.theseed.org/linkin.cgi?id="},
316   {"SGD",                   "http://www.yeastgenome.org/cgi-bin/locus.fpl?sgdid="},
317   {"SGN",                   "http://www.sgn.cornell.edu/search/est.pl?request_type=7&request_id="},
318   {"SK-FST",                "http://aafc-aac.usask.ca/fst/"},
319   {"SRPDB",                 "http://rnp.uthscsa.edu/rnp/SRPDB/rna/sequences/fasta/"},
320   {"SubtiList",             "http://genolist.pasteur.fr/SubtiList/genome.cgi?external_query+"},
321   {"TAIR",                  "http://www.arabidopsis.org/servlets/TairObject?type=locus&name="},
322   {"taxon",                 "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?"},
323   {"TIGRFAM",               "http://www.jcvi.org/cgi-bin/tigrfams/HmmReportPage.cgi?acc="},
324   {"TubercuList",           "http://tuberculist.epfl.ch/quicksearch.php?gene+name="},
325   {"UniGene",               "https://www.ncbi.nlm.nih.gov/sites/entrez?Db=unigene&Cmd=Search&Term="},
326   {"UniProtKB/Swiss-Prot",  "http://www.uniprot.org/uniprot/"},
327   {"UniProtKB/TrEMBL",      "http://www.uniprot.org/uniprot/"},
328   {"UniSTS",                "https://www.ncbi.nlm.nih.gov/probe?term="},
329   {"UNITE",                 "http://unite.ut.ee/bl_forw.php?nimi="},
330   {"VBASE2",                "http://www.dnaplot.de/vbase2/vgene.php?id="},
331   {"VBRC",                  "http://vbrc.org/query.asp?web_view=curation&web_id="},
332   {"VectorBase",            "http://www.vectorbase.org/Genome/BRCGene/?feature="},
333   {"Vega",                  "http://vega.sanger.ac.uk/id/"},
334   {"VGNC",                  "http://vertebrate.genenames.org/data/gene-symbol-report/#!/vgnc_id/VGNC:"},
335   {"ViPR",                  "http://www.viprbrc.org/brc/viprStrainDetails.do?viprSubmissionId="},
336   {"VISTA",                 "http://enhancer.lbl.gov/cgi-bin/dbxref.pl?id="},
337   {"WorfDB",                "http://worfdb.dfci.harvard.edu/search.pl?form=1&search="},
338   {"WormBase",              "http://www.wormbase.org/search/gene/"},
339   {"Xenbase",               "http://www.xenbase.org/gene/showgene.do?method=display&geneId="},
340   {"ZFIN",                  "http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID="},
341 };
342 
DbNameIsValid(CharPtr db)343 static Int2 DbNameIsValid (
344   CharPtr db
345 )
346 
347 {
348   Int2  L, R, mid;
349 
350   if (StringHasNoText (db)) return -1;
351 
352   L = 0;
353   R = sizeof (Nlm_url_base) / sizeof (Nlm_url_base [0]);
354 
355   while (L < R) {
356     mid = (L + R) / 2;
357     if (StringICmp (Nlm_url_base [mid].tag, db) < 0) {
358       L = mid + 1;
359     } else {
360       R = mid;
361     }
362   }
363 
364   /* case sensitive comparison at end enforces strictness */
365 
366   if (StringCmp (Nlm_url_base [R].tag, db) == 0) {
367     return R;
368   }
369 
370   return -1;
371 }
372 
373 static const Char* kNCBIUrl = "http://www.ncbi.nlm.nih.gov/";
374 static const Char* kNCBISUrl = "https://www.ncbi.nlm.nih.gov/";
375 
FF_www_get_url(StringItemPtr ffstring,CharPtr db,CharPtr identifier,BioseqPtr bsp)376 static void FF_www_get_url (
377   StringItemPtr ffstring,
378   CharPtr db,
379   CharPtr identifier,
380   BioseqPtr bsp
381 )
382 
383 {
384   CharPtr  base = NULL, prefix = NULL, ident = NULL,
385            suffix = NULL, url = NULL, redundant = NULL, ptr, str;
386   Char     ch, buf [128], id [20], taxname [128], zeroes [16];
387   Boolean  is_numeric;
388   Int2     len, num_alpha, num_digit, num_unscr, R;
389 
390   if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
391 
392   while (*identifier == ' ') {
393     identifier++;
394   }
395   ident = identifier;
396 
397   R = DbNameIsValid (db);
398   if (R < 0) {
399     FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
400     return;
401   }
402 
403   url = Nlm_url_base [R].url;
404 
405   /* NCBI URL can be overridden by configuration file */
406 
407   if (GetAppParam ("NCBI", "WWWENTREZ", "NCBI_URL_BASE", NULL, buf, sizeof (buf))) {
408     if (StringDoesHaveText (buf)) {
409       Uint1 len = 0;
410       if (StringNICmp (url, kNCBIUrl, strlen(kNCBIUrl)) == 0)
411         len = strlen(kNCBIUrl);
412       else if (StringNICmp (url, kNCBISUrl, strlen(kNCBISUrl)) == 0)
413         len = strlen(kNCBISUrl);
414       if (len > 0) {
415         url += len;
416         base = buf;
417       }
418     }
419   }
420 
421   /* special cases */
422 
423 
424   if (StringCmp (db, "IRD") == 0) {
425 
426     suffix = "&decorator=influenza";
427 
428   } else if (StringCmp (db, "ATCC") == 0) {
429 
430     suffix = ".aspx";
431 
432   } else if (StringCmp (db, "BEI") == 0) {
433 
434     suffix = ".aspx";
435 
436   } else if (StringCmp (db, "ViPR") == 0) {
437 
438     suffix = "&decorator=vipr";
439 
440   } else if (StringCmp (db, "SRPDB") == 0) {
441 
442     suffix = ".fasta";
443 
444   } else if (StringCmp (db, "dbSTS") == 0) {
445 
446     /*
447     is_numeric = TRUE;
448     str = identifier;
449     ch = *str;
450     while (ch != '\0') {
451       if (! IS_DIGIT (ch)) {
452         is_numeric = FALSE;
453       }
454       str++;
455       ch = *str;
456     }
457 
458     if (is_numeric) {
459       prefix = "val=gnl|dbsts|";
460     } else if (ValidateAccn (identifier) == 0) {
461       prefix = "val=";
462     } else {
463       FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
464       return;
465     }
466     */
467 
468   } else if (StringCmp (db, "FLYBASE") == 0) {
469 
470     if (StringStr (identifier, "FBa") != NULL ) {
471       url = "http://www.fruitfly.org/cgi-bin/annot/fban?";
472     }
473 
474   } else if (StringCmp (db, "Fungorum") == 0) {
475 
476     str = identifier;
477     ch = *str;
478     while (ch != '\0' && ! IS_DIGIT (ch)) {
479       str++;
480       ch = *str;
481     }
482     ident = str;
483 
484   } else if (StringCmp (db, "dictyBase") == 0) {
485 
486     if (StringChr (identifier, '_') != NULL) {
487       url = "http://dictybase.org/db/cgi-bin/gene_page.pl?primary_id=";
488     }
489 
490   } else if (StringCmp (db, "GDB") == 0) {
491 
492     str = StringStr (identifier, "G00-");
493     if (str != NULL) {
494       ptr = id;
495       str += 4;
496       ch = *str;
497       while (ch != '\0') {
498         if (ch != '-') {
499           *ptr = ch;
500           ptr++;
501         }
502         str++;
503         ch = *str;
504       }
505       *ptr = '\0';
506       ident = id;
507     } else {
508       ch = *identifier;
509       if (! IS_DIGIT (ch)) {
510         FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
511         return;
512       }
513     }
514 
515   } else if (StringCmp (db, "H_InvDB") == 0) {
516 
517     if (StringStr (identifier, "HIT") != NULL) {
518       url = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=30&KEN_STR=";
519     } else if (StringStr (identifier, "HIX") != NULL) {
520       url = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR=";
521     }
522 
523   } else if (StringCmp (db, "HOMD") == 0) {
524 
525     if (StringStr (identifier, "tax_") != NULL ) {
526       url = "http://www.homd.org/taxon=";
527       ident += 4;
528     } else if (StringStr (identifier, "seq_") != NULL ) {
529       url = "http://www.homd.org/seq=";
530       ident += 4;
531     }
532 
533   } else if (StringCmp (db, "IMGT/GENE-DB") == 0) {
534 
535     if (bsp != NULL && BioseqToGeneticCode (bsp, NULL, NULL, NULL, taxname, sizeof (taxname), NULL)) {
536       if (StringCmp (taxname, "Homo sapiens") == 0) {
537         url = "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Homo+sapiens&query=2+";
538       }
539       if (StringCmp (taxname, "Mus musculus") == 0) {
540         url = "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Mus+musculus&query=2+";
541       }
542     }
543 
544   } else if (StringCmp (db, "IMGT/HLA") == 0) {
545 
546     if (StringNICmp (identifier, "HLA", 3) != 0 ) {
547       url = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=imgthla;id=";
548     }
549 
550   } else if (StringCmp (db, "MaizeGDB") == 0) {
551 
552     is_numeric = TRUE;
553     str = identifier;
554     ch = *str;
555     while (ch != '\0') {
556       if (! IS_DIGIT (ch)) {
557         is_numeric = FALSE;
558       }
559       str++;
560       ch = *str;
561     }
562 
563     if (is_numeric) {
564       prefix = "id=";
565     } else {
566       prefix = "term=";
567     }
568 
569   } else if (StringCmp (db, "miRBase") == 0) {
570 
571     if (StringStr (identifier, "MIMAT") != NULL) {
572       url = "http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=";
573     }
574 
575   } else if (StringCmp (db, "RefSeq") == 0) {
576 
577     ch = identifier [0];
578     if (IS_ALPHA (ch) && identifier [1] == 'P' && identifier [2] == '_') {
579       url = "https://www.ncbi.nlm.nih.gov/protein/";
580     }
581 
582   } else if (StringCmp (db, "WormBase") == 0) {
583 
584     if (LooksLikeAccession (identifier, &num_alpha, &num_digit, &num_unscr) && num_alpha == 3 && num_digit == 5) {
585       url = "http://www.wormbase.org/search/protein/";
586     }
587 
588   } else if (StringCmp (db, "niaEST") == 0) {
589 
590     suffix = "&val=1";
591 
592   } else if (StringCmp (db, "RAP-DB") == 0) {
593 
594     suffix = ";class=locus_id";
595 
596   } else if (StringCmp (db, "REBASE") == 0) {
597 
598     suffix = ".html";
599 
600   } else if (StringCmp (db, "SK-FST") == 0) {
601 
602     ident = NULL;
603 
604   } else if (StringCmp (db, "taxon") == 0) {
605 
606     ch = *identifier;
607     if (IS_DIGIT (ch)) {
608       prefix = "id=";
609     } else {
610       prefix = "name=";
611     }
612 
613   } else if (StringCmp (db, "UniSTS") == 0) {
614 
615     suffix = "%20%5BUniSTS%20ID%5D";
616 
617   } else if (StringCmp (db, "HGNC") == 0) {
618 
619     if (StringNCmp (identifier, "HGNC:", 5) == 0 ) {
620       ident += 5;
621     }
622     redundant = "HGNC:";
623 
624   } else if (StringCmp (db, "VGNC") == 0) {
625 
626     if (StringNCmp (identifier, "VGNC:", 5) == 0 ) {
627       ident += 5;
628     }
629     redundant = "VGNC:";
630 
631   } else if (StringCmp (db, "MGI") == 0) {
632 
633     if (StringNCmp (identifier, "MGI:", 4) == 0 ) {
634       ident += 4;
635     }
636     redundant = "MGI:";
637 
638   } else if (StringCmp (db, "RGD") == 0) {
639 
640     if (StringNCmp (identifier, "RGD:", 4) == 0 ) {
641       ident += 4;
642     }
643 
644   } else if (StringCmp (db, "ISHAM-ITS") == 0) {
645 
646     if (StringNCmp (identifier, "MITS", 4) == 0 ) {
647       ident += 4;
648     }
649 
650   } else if (StringCmp (db, "NBRC") == 0) {
651 
652     len = StringLen (identifier);
653     if (len < 8) {
654       StringCpy (zeroes, "00000000");
655       prefix = zeroes + len;
656     }
657 
658   }
659 
660   /* now generate URL */
661 
662   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
663   if (StringDoesHaveText (base)) {
664     FFAddOneString (ffstring, base, FALSE, FALSE, TILDE_IGNORE);
665   }
666   FFAddOneString (ffstring, url, FALSE, FALSE, TILDE_IGNORE);
667   if (StringDoesHaveText (prefix)) {
668     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
669   }
670   if (StringDoesHaveText (ident)) {
671     FFAddOneString (ffstring, ident, FALSE, FALSE, TILDE_IGNORE);
672   }
673   if (StringDoesHaveText (suffix)) {
674     FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
675   }
676   FFAddOneString (ffstring,  "\">", FALSE, FALSE, TILDE_IGNORE);
677   if (StringDoesHaveText (redundant)) {
678     FFAddOneString (ffstring, redundant, FALSE, FALSE, TILDE_IGNORE);
679   }
680   FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
681   FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
682 }
683 
FF_www_db_xref(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr db,CharPtr identifier,BioseqPtr bsp)684 NLM_EXTERN void FF_www_db_xref (
685   IntAsn2gbJobPtr ajp,
686   StringItemPtr ffstring,
687   CharPtr db,
688   CharPtr identifier,
689   BioseqPtr bsp
690 )
691 {
692   CharPtr  colon = ":";
693 
694   if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
695 
696   if (StringCmp (db, "HGNC") == 0 && StringNCmp (identifier, "HGNC:", 5) == 0) {
697     identifier += 5;
698   } else if (StringCmp (db, "VGNC") == 0 && StringNCmp (identifier, "VGNC:", 5) == 0) {
699     identifier += 5;
700   } else if (StringCmp (db, "MGI") == 0 && StringNCmp (identifier, "MGI:", 4) == 0) {
701     identifier += 4;
702   }
703 
704   if (GetWWW (ajp)) {
705     FFAddTextToString (ffstring, db, colon, NULL, FALSE, FALSE, TILDE_IGNORE);
706     FF_www_get_url (ffstring, db, identifier, bsp);
707   } else {
708     if (StringCmp (db, "HGNC") == 0) {
709       colon = ":HGNC:";
710     } else if (StringCmp (db, "VGNC") == 0) {
711       colon = ":VGNC:";
712     } else if (StringCmp (db, "MGI") == 0) {
713       colon = ":MGI:";
714     }
715     FFAddTextToString (ffstring, db, colon, identifier, FALSE, FALSE, TILDE_IGNORE);
716   }
717 }
718 
FF_Add_NCBI_Base_URL(StringItemPtr ffstring,CharPtr url)719 NLM_EXTERN void FF_Add_NCBI_Base_URL (
720   StringItemPtr ffstring,
721   CharPtr url
722 )
723 
724 {
725   CharPtr  base = NULL;
726   Char     buf [128];
727 
728   if (ffstring == NULL || StringHasNoText (url)) return;
729 
730   /* NCBI URL can be overridden by configuration file */
731 
732   if (GetAppParam ("NCBI", "WWWENTREZ", "NCBI_URL_BASE", NULL, buf, sizeof (buf))) {
733     if (StringDoesHaveText (buf)) {
734       Uint1 len = 0;
735       if (StringNICmp (url, kNCBIUrl, strlen(kNCBIUrl)) == 0)
736         len = strlen(kNCBIUrl);
737       else if (StringNICmp (url, kNCBISUrl, strlen(kNCBISUrl)) == 0)
738         len = strlen(kNCBISUrl);
739       if (len > 0) {
740         url += len;
741         base = buf;
742       }
743     }
744   }
745 
746   if (StringDoesHaveText (base)) {
747     FFAddOneString (ffstring, base, FALSE, FALSE, TILDE_IGNORE);
748   }
749   FFAddOneString (ffstring, url, FALSE, FALSE, TILDE_IGNORE);
750 }
751 
752 
753 /* ************** */
754 
755 
756 /* public function to get URLs for collaboration-approved db_xrefs */
757 
758 static Boolean links_loaded = FALSE;
759 
asn2gnbk_dbxref(DbtagPtr dbt)760 NLM_EXTERN CharPtr asn2gnbk_dbxref (
761   DbtagPtr dbt
762 )
763 
764 {
765   IntAsn2gbJobPtr  ajp;
766   Char             buf [128];
767   StringItemPtr    ffstring;
768   ObjectIdPtr      oip;
769   CharPtr          ptr;
770   CharPtr          str;
771   CharPtr          tmp;
772 
773   if (dbt == NULL) return NULL;
774   if (StringHasNoText (dbt->db)) return NULL;
775   oip = dbt->tag;
776   if (oip == NULL) return NULL;
777 
778   if (! StringHasNoText (oip->str)) {
779     if (StringLen (dbt->db) + StringLen (oip->str) < 80) {
780       sprintf (buf, "%s", oip->str);
781     }
782   } else {
783     sprintf (buf, "%ld", (long) oip->id);
784   }
785 
786   ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
787   if (ajp == NULL) return NULL;
788   ffstring = FFGetString (ajp);
789   if ( ffstring == NULL ) return NULL;
790 
791   if (! links_loaded) {
792     InitWWW (ajp);
793     links_loaded = TRUE;
794   }
795   ajp->www = TRUE;
796 
797   FF_www_db_xref (ajp, ffstring, dbt->db, buf, NULL);
798 
799   ajp->www = FALSE;
800 
801   str = FFToCharPtr (ffstring);
802 
803   FFRecycleString (ajp, ffstring);
804   /*
805   MemFree (ajp);
806   */
807   asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
808 
809   tmp = StringChr (str, '<');
810   if (tmp != NULL) {
811     ptr = StringSave (tmp);
812     tmp = StringChr (ptr, '>');
813     if (tmp != NULL) {
814       tmp++;
815       *tmp = '\0';
816     }
817     MemFree (str);
818     str = ptr;
819   } else {
820     str = MemFree (str);
821   }
822 
823   return str;
824 }
825 
826 /* format references section */
827 
GetAuthListPtr(PubdescPtr pdp,CitSubPtr csp)828 NLM_EXTERN AuthListPtr GetAuthListPtr (
829   PubdescPtr pdp,
830   CitSubPtr csp
831 )
832 
833 {
834   AuthListPtr  alp = NULL;
835   CitArtPtr    cap;
836   CitBookPtr   cbp;
837   CitGenPtr    cgp;
838   CitPatPtr    cpp;
839   ValNodePtr   vnp;
840 
841   if (csp != NULL) {
842     alp = csp->authors;
843     if (alp != NULL) return alp;
844   }
845   if (pdp == NULL) return NULL;
846 
847   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
848     switch (vnp->choice) {
849       case PUB_Gen :
850         cgp = (CitGenPtr) vnp->data.ptrvalue;
851         if (cgp != NULL) {
852           alp = cgp->authors;
853         }
854         break;
855       case PUB_Sub :
856         csp = (CitSubPtr) vnp->data.ptrvalue;
857         if (csp != NULL) {
858           alp = csp->authors;
859         }
860         break;
861       case PUB_Article :
862         cap = (CitArtPtr) vnp->data.ptrvalue;
863         if (cap != NULL) {
864           alp = cap->authors;
865         }
866         break;
867       case PUB_Book :
868       case PUB_Proc :
869       case PUB_Man :
870         cbp = (CitBookPtr) vnp->data.ptrvalue;
871         if (cbp != NULL) {
872           alp = cbp->authors;
873         }
874         break;
875       case PUB_Patent :
876         cpp = (CitPatPtr) vnp->data.ptrvalue;
877         if (cpp != NULL) {
878           alp = cpp->authors;
879         }
880         break;
881       default :
882         break;
883     }
884 
885     if (alp != NULL) return alp;
886   }
887 
888   return NULL;
889 }
890 
MakeSingleAuthorString(FmtType format,CharPtr prefix,CharPtr name,CharPtr initials,CharPtr suffix,IndxPtr index,GBReferencePtr gbref)891 static CharPtr MakeSingleAuthorString (
892   FmtType format,
893   CharPtr prefix,
894   CharPtr name,
895   CharPtr initials,
896   CharPtr suffix,
897   IndxPtr index,
898   GBReferencePtr gbref
899 )
900 
901 {
902   Char     ch;
903   Char     dummy [10];
904   size_t   len;
905   CharPtr  nametoindex;
906   CharPtr  ptr;
907   CharPtr  str;
908   CharPtr  tmp;
909 
910   if (name == NULL) return NULL;
911 
912   /* !!! clean up 'et al' as (presumably) last author !!! */
913 
914   /* !!! temporary to suppress diff !!! */
915   {
916   if (StringLen (name) <= 6 &&
917       (StringNICmp (name, "et al", 5) == 0 || StringNICmp (name, "et,al", 5) == 0)) {
918     if (StringCmp (prefix, " and ") == 0) {
919       prefix = NULL;
920       dummy [0] = ' ';
921       StringNCpy_0 (dummy + 1, name, sizeof (dummy) - 1);
922       name = dummy;
923     }
924   }
925   }
926   /*
927   if (StringLen (name) <= 6 &&
928       (StringNICmp (name, "et al", 5) == 0 || StringNICmp (name, "et,al", 5) == 0)) {
929     name = "et al.";
930     if (StringCmp (prefix, " and ") == 0) {
931       prefix = ", ";
932     }
933   }
934   */
935 
936   len = StringLen (name) + StringLen (initials) + StringLen (suffix) + StringLen (prefix);
937   str = MemNew (sizeof (Char) * (len + 4));
938   if (str == NULL) return NULL;
939 
940   ptr = str;
941   if (! StringHasNoText (prefix)) {
942     ptr = StringMove (ptr, prefix);
943   }
944   nametoindex = ptr;
945 
946   /* initials and suffix to support structured name fields */
947 
948   tmp = StringMove (ptr, name);
949   if (! StringHasNoText (initials)) {
950     tmp = StringMove (tmp, ",");
951     tmp = StringMove (tmp, initials);
952   }
953   if (! StringHasNoText (suffix)) {
954     tmp = StringMove (tmp, " ");
955     tmp = StringMove (tmp, suffix);
956   }
957 
958   /* optionally populate indexes for NCBI internal database */
959 
960   if (index != NULL) {
961     ValNodeCopyStrToHead (&(index->authors), 0, nametoindex);
962   }
963 
964   /* optionally populate gbseq for XML-ized GenBank format */
965 
966   if (gbref != NULL) {
967     ValNodeCopyStr (&(gbref->authors), 0, nametoindex);
968   }
969 
970   /* if embl, remove commas in individual names, starting after prefix */
971 
972   if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
973     tmp = ptr;
974     ch = *tmp;
975     while (ch != '\0') {
976       if (ch == ',') {
977         *tmp = ' ';
978       }
979       tmp++;
980       ch = *tmp;
981     }
982   }
983 
984   return str;
985 }
986 
GetAuthorsString(FmtType format,AuthListPtr alp,CharPtr PNTR consortP,IndxPtr index,GBReferencePtr gbref)987 NLM_EXTERN CharPtr GetAuthorsString (
988   FmtType format,
989   AuthListPtr alp,
990   CharPtr PNTR consortP,
991   IndxPtr index,
992   GBReferencePtr gbref
993 )
994 
995 {
996   AuthorPtr    ap;
997   ValNodePtr   clist;
998   ValNodePtr   conslist;
999   Int2         count;
1000   ValNodePtr   head = NULL;
1001   ValNodePtr   names;
1002   ValNodePtr   next;
1003   NameStdPtr   nsp;
1004   PersonIdPtr  pid;
1005   ValNodePtr   pidlist;
1006   CharPtr      prefix = NULL;
1007   CharPtr      str;
1008   ValNodePtr   vnp;
1009 
1010   if (alp == NULL) return NULL;
1011 
1012   /*
1013   alp = AsnIoMemCopy ((Pointer) alp,
1014                       (AsnReadFunc) AuthListAsnRead,
1015                       (AsnWriteFunc) AuthListAsnWrite);
1016   if (alp == NULL) return NULL;
1017   */
1018 
1019   count = 0;
1020   if (alp->choice == 1) {
1021 
1022     pidlist = NULL;
1023     conslist = NULL;
1024 
1025     for (names = alp->names; names != NULL; names = names->next) {
1026       ap = (AuthorPtr) names->data.ptrvalue;
1027       if (ap == NULL) continue;
1028       pid = ap->name;
1029       if (pid == NULL) continue;
1030       if (pid->choice == 2 || pid->choice == 3 || pid->choice == 4) {
1031         ValNodeAddPointer (&pidlist, 0, (Pointer) pid);
1032       } else if (pid->choice == 5) {
1033         ValNodeAddPointer (&conslist, 0, (Pointer) pid);
1034       }
1035     }
1036 
1037     for (vnp = pidlist; vnp != NULL; vnp = vnp->next) {
1038       next = vnp->next;
1039       if (next == NULL) {
1040         if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1041           if (count == 0) {
1042             prefix = NULL;
1043           } else {
1044             prefix = " and ";
1045           }
1046         }
1047       }
1048       str = NULL;
1049       pid = (PersonIdPtr) vnp->data.ptrvalue;
1050       if (pid->choice == 2) {
1051         nsp = (NameStdPtr) pid->data;
1052         if (nsp != NULL) {
1053           if (! StringHasNoText (nsp->names [0])) {
1054             str = MakeSingleAuthorString (format, prefix, nsp->names [0], nsp->names [4], nsp->names [5], index, gbref);
1055           } else if (! StringHasNoText (nsp->names [3])) {
1056             str = MakeSingleAuthorString (format, prefix, nsp->names [3], NULL, NULL, index, gbref);
1057           }
1058         }
1059       } else if (pid->choice == 3 || pid->choice == 4) {
1060         str = MakeSingleAuthorString (format, prefix, (CharPtr) pid->data, NULL, NULL, index, gbref);
1061       }
1062       if (str != NULL) {
1063         ValNodeAddStr (&head, 0, str);
1064         count++;
1065       }
1066       prefix = ", ";
1067     }
1068 
1069     prefix = NULL;
1070     clist = NULL;
1071     for (vnp = conslist; vnp != NULL; vnp = vnp->next) {
1072       str = NULL;
1073       pid = (PersonIdPtr) vnp->data.ptrvalue;
1074       if (pid->choice == 5) {
1075         str = MakeSingleAuthorString (format, prefix, (CharPtr) pid->data, NULL, NULL, index, NULL);
1076         if (str != NULL) {
1077           ValNodeAddStr (&clist, 0, str);
1078         }
1079         prefix = "; ";
1080       }
1081     }
1082     if (clist != NULL) {
1083       str = MergeFFValNodeStrs (clist);
1084       if ((! StringHasNoText (str)) && consortP != NULL && *consortP == NULL) {
1085         *consortP = StringSave (str);
1086       }
1087 
1088       /* optionally populate gbseq for XML-ized GenBank format */
1089 
1090       if (gbref != NULL) {
1091         gbref->consortium = StringSave (str);
1092       }
1093 
1094       str = MemFree (str);
1095       ValNodeFreeData (clist);
1096     }
1097 
1098     ValNodeFree (pidlist);
1099     ValNodeFree (conslist);
1100 
1101   } else if (alp->choice == 2 || alp->choice == 3) {
1102     for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
1103       next = vnp->next;
1104       if (next == NULL) {
1105         if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1106           if (count == 0) {
1107             prefix = NULL;
1108           } else {
1109             prefix = " and ";
1110           }
1111         }
1112       }
1113       str = MakeSingleAuthorString (format, prefix, (CharPtr) vnp->data.ptrvalue, NULL, NULL, index, gbref);
1114       if (str != NULL) {
1115         ValNodeAddStr (&head, 0, str);
1116         count++;
1117       }
1118       prefix = ", ";
1119     }
1120   }
1121 
1122   str = MergeFFValNodeStrs (head);
1123 
1124   ValNodeFreeData (head);
1125 
1126   /*
1127   AuthListFree (alp);
1128   */
1129 
1130   return str;
1131 }
1132 
1133 /*
1134 Strips all spaces in string in following manner. If the function
1135 meet several spaces (spaces and tabs) in succession it replaces them
1136 with one space. Strips all spaces after '(' and before ')'
1137 */
1138 
StrStripSpaces(CharPtr str)1139 static void StrStripSpaces (
1140   CharPtr str
1141 )
1142 
1143 {
1144   CharPtr  new_str;
1145 
1146   if (str == NULL) return;
1147 
1148   new_str = str;
1149   while (*str != '\0') {
1150     *new_str++ = *str;
1151     if (*str == ' ' || *str == '\t' || *str == '(') {
1152       for (str++; *str == ' ' || *str == '\t'; str++) continue;
1153       if (*str == ')' || *str == ',') {
1154         if( *(new_str - 1) != '(' ) { // this if handles the case "\([ \t]*\)"
1155           --new_str;
1156         }
1157       }
1158     } else {
1159       str++;
1160     }
1161   }
1162   *new_str = '\0';
1163 }
1164 
AllCaps(CharPtr p)1165 static Boolean AllCaps (
1166   CharPtr p
1167 )
1168 
1169 {
1170   if (p == NULL) return FALSE;
1171 
1172   for (p++; p != NULL && *p != '\0'; p++) {
1173     if (IS_LOWER (*p)) return FALSE;
1174   }
1175   return TRUE;
1176 }
1177 
CleanEquals(CharPtr p)1178 static void CleanEquals (
1179   CharPtr p
1180 )
1181 
1182 {
1183   if (p == NULL) return;
1184 
1185   for (; *p != '\0'; p++) {
1186     if (*p == '\"') {
1187       *p = '\'';
1188     }
1189   }
1190 }
1191 
GetPubTitle(FmtType format,PubdescPtr pdp,CitSubPtr csp)1192 static CharPtr GetPubTitle (
1193   FmtType format,
1194   PubdescPtr pdp,
1195   CitSubPtr csp
1196 )
1197 
1198 {
1199   CitArtPtr        cap;
1200   CitBookPtr       cbp;
1201   CitGenPtr        cgp;
1202   Char             ch;
1203   CitPatPtr        cpp;
1204   MedlineEntryPtr  mep;
1205   CharPtr          ptr;
1206   CharPtr          title = NULL;
1207   ValNodePtr       ttl = NULL;
1208   ValNodePtr       vnp;
1209 
1210   if (csp != NULL) {
1211     if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1212       title = "Direct Submission";
1213       return StringSave (title);
1214     } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1215       return NULL;
1216     }
1217   }
1218   if (pdp == NULL) return NULL;
1219 
1220   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
1221     switch (vnp->choice) {
1222       case PUB_Gen :
1223         cgp = (CitGenPtr) vnp->data.ptrvalue;
1224         if (cgp != NULL) {
1225           if (! StringHasNoText (cgp->title)) return StringSave (cgp->title);
1226           if (! StringHasNoText (cgp->cit)) {
1227             ptr = StringStr (cgp->cit, "Title=\"");
1228             if (ptr != NULL) {
1229               title = StringSave (ptr + 7);
1230               for (ptr = title; *ptr != '\0'; ptr++) {
1231                 if (*ptr == '"') {
1232                   *ptr = '\0';
1233                   break;
1234                 }
1235               }
1236               return title;
1237             }
1238           }
1239         }
1240         break;
1241       case PUB_Sub :
1242         csp = (CitSubPtr) vnp->data.ptrvalue;
1243         if (csp != NULL) {
1244           if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1245             title = "Direct Submission";
1246             return StringSave (title);
1247           } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1248             return NULL;
1249           }
1250         }
1251         break;
1252       case PUB_Medline :
1253         mep = (MedlineEntryPtr) vnp->data.ptrvalue;
1254         if (mep != NULL) {
1255           cap = mep->cit;
1256           if (cap != NULL) {
1257             ttl = cap->title;
1258           }
1259         }
1260         break;
1261       case PUB_Article :
1262         cap = (CitArtPtr) vnp->data.ptrvalue;
1263         if (cap != NULL) {
1264           ttl = cap->title;
1265         }
1266         break;
1267       /* case PUB_Book : */
1268       case PUB_Proc :
1269       case PUB_Man :
1270         cbp = (CitBookPtr) vnp->data.ptrvalue;
1271         if (cbp != NULL) {
1272           ttl = cbp->title;
1273           if (ttl != NULL) {
1274             title = (CharPtr) ttl->data.ptrvalue;
1275             if (! StringHasNoText (title)) {
1276               title = StringSave (title);
1277               if (StringLen (title) > 3) {
1278                 ch = *title;
1279                 if (IS_LOWER (ch)) {
1280                   *title = TO_UPPER (ch);
1281                 }
1282                 ptr = title;
1283                 if (AllCaps (ptr)) {
1284                   for (ptr++; ptr != NULL && *ptr != '\0'; ptr++) {
1285                     ch = *ptr;
1286                     *ptr = TO_LOWER (ch);
1287                   }
1288                 }
1289               }
1290               return title;
1291             }
1292           }
1293         }
1294         break;
1295       case PUB_Patent :
1296         cpp = (CitPatPtr) vnp->data.ptrvalue;
1297         if (cpp != NULL) {
1298           title = cpp->title;
1299           if (! StringHasNoText (title)) {
1300             return StringSave (title);
1301           }
1302         }
1303         break;
1304       default :
1305         break;
1306     }
1307 
1308     if (ttl != NULL) {
1309       title = (CharPtr) ttl->data.ptrvalue;
1310       if (! StringHasNoText (title)) {
1311         return StringSave (title);
1312       }
1313     }
1314   }
1315 
1316   return NULL;
1317 }
1318 
CleanPubTitle(CharPtr title)1319 static void CleanPubTitle (
1320   CharPtr title
1321 )
1322 
1323 {
1324   CharPtr  p;
1325   Boolean  remove_it;
1326 
1327   if (title == NULL) return;
1328 
1329   CleanEquals (title);
1330 
1331   for (p = title + StringLen (title) - 1; p > title + 2; p--) {
1332     if (*p == ' ') {
1333       *p = '\0';
1334     } else if (*p == '.') {
1335       remove_it = FALSE;
1336       if (p > title + 5) {
1337         if (*(p - 1) != '.' || *(p - 2) != '.') {
1338           remove_it = TRUE;
1339         }
1340       }
1341       if (remove_it) {
1342         *p = '\0';
1343       }
1344       break;
1345     } else {
1346       break;
1347     }
1348   }
1349 }
1350 
1351 /*
1352 medline type page numbering is expanded (e.g., 125-35 -> 125-135,
1353 F124-34 -> F124-F134, 12a-c -> 12a-12c).
1354 If only one page is given, this is output without a dash.
1355 Expanded numbering is validated to ensure that the
1356 first number is smaller than or equal to the second and
1357 that the first letter is less than or identical to the second
1358 (i.e., a < c).  If the input is all letters (i.e., roman numerals)
1359 this is not validated.
1360 
1361 Return values:
1362  0 : valid page numbering.
1363 -1 : invalid page numbering.
1364 */
1365 
1366 #define MAX_PAGE_DIGITS 12
1367 
FixPages(CharPtr out_pages,CharPtr in_pages)1368 static Int2 FixPages (
1369   CharPtr out_pages,
1370   CharPtr in_pages
1371 )
1372 
1373 {
1374   Boolean dash=TRUE, first_alpha;
1375   Char firstbegin[MAX_PAGE_DIGITS];
1376   Char secondbegin[MAX_PAGE_DIGITS];
1377   Char firstend[MAX_PAGE_DIGITS];
1378   Char secondend[MAX_PAGE_DIGITS];
1379   Char temp[MAX_PAGE_DIGITS];
1380   CharPtr alphabegin, numbegin = NULL, alphaend, numend = NULL, ptr, in=in_pages;
1381   Int2 diff, index, retval=0;
1382   Int2 length_nb, length_ab, length_ne, length_ae;
1383   Int4 num1=0, num2=0;
1384 
1385   if (in_pages == NULL) return retval;
1386 
1387   while (*in != '\0')
1388   {      /* Check for digits in input*/
1389     if (IS_DIGIT(*in))
1390       break;
1391     in++;
1392   }
1393 
1394   if (*in == '\0' || (in != in_pages && *(in-1) == ' '))
1395   {    /* if all letters (i.e. roman numerals), put out. */
1396     out_pages = StringCpy(out_pages, in_pages);
1397     return retval;
1398   }
1399 
1400   in = in_pages;
1401   if (IS_DIGIT(*in))
1402   {      /* Do digits come first? */
1403     first_alpha = FALSE;
1404     index=0;
1405     while (IS_DIGIT(*in) || *in == ' ')
1406     {
1407       firstbegin[index] = *in;
1408       if (*in != ' ')
1409         index++;
1410       in++;
1411       if (*in == '-')
1412         break;
1413 
1414     }
1415     firstbegin[index] = '\0';
1416     index=0;
1417     if (*in != '-')
1418     {    /* After digits look for letters. */
1419       while (IS_ALPHA(*in)  || *in == ' ')
1420       {
1421         secondbegin[index] = *in;
1422         index++;
1423         in++;
1424         if (*in == '-')
1425           break;
1426       }
1427     }
1428     secondbegin[index] = '\0';
1429     if (*in == '-')    /* if dash is not present, note */
1430       in++;
1431     else
1432       dash=FALSE;
1433     index=0;
1434     while (IS_DIGIT(*in) || *in == ' ')
1435     {      /* Look for digits.  */
1436       firstend[index] = *in;
1437       if (*in != ' ')
1438         index++;
1439       in++;
1440     }
1441     firstend[index] = '\0';
1442     index=0;
1443     if (*in != '\0')
1444     {      /* Look for letters again. */
1445       while (IS_ALPHA(*in)  || *in == ' ')
1446       {
1447         secondend[index] = *in;
1448         index++;
1449         in++;
1450       }
1451     }
1452     secondend[index] = '\0';
1453   }
1454   else
1455   {      /* Do letters come first? */
1456     first_alpha = TRUE;
1457     index=0;
1458     while (IS_ALPHA(*in) || *in == ' ')
1459     {
1460       firstbegin[index] = *in;
1461       index++;
1462       in++;
1463       if (*in == '-')
1464         break;
1465     }
1466     firstbegin[index] = '\0';
1467     index=0;
1468     if (*in != '-')
1469     {    /* After letters look for digits.   */
1470       while (IS_DIGIT(*in)  || *in == ' ')
1471       {
1472         secondbegin[index] = *in;
1473         if (*in != ' ')
1474           index++;
1475         in++;
1476         if (*in == '-')
1477           break;
1478       }
1479     }
1480     secondbegin[index] = '\0';
1481     if (*in == '-')    /* Note if dash is missing. */
1482       in++;
1483     else
1484       dash=FALSE;
1485     index=0;
1486     while (IS_ALPHA(*in) || *in == ' ')
1487     {    /* Look for letters again. */
1488       firstend[index] = *in;
1489       index++;
1490       in++;
1491     }
1492     firstend[index] = '\0';
1493     index=0;
1494     if (*in != '\0')
1495     {    /* Any digits here? */
1496       while (IS_DIGIT(*in)  || *in == ' ')
1497       {
1498         secondend[index] = *in;
1499         if (*in != ' ')
1500           index++;
1501         in++;
1502       }
1503     }
1504     secondend[index] = '\0';
1505   }
1506 
1507   if (first_alpha)
1508   {
1509     alphabegin = firstbegin;
1510     numbegin = secondbegin;
1511     alphaend = firstend;
1512     numend = secondend;
1513   }
1514   else
1515   {
1516     numbegin = firstbegin;
1517     alphabegin = secondbegin;
1518     numend = firstend;
1519     alphaend = secondend;
1520   }
1521 
1522   length_nb = StringLen(numbegin);
1523   length_ab = StringLen(alphabegin);
1524   length_ne = StringLen(numend);
1525   length_ae = StringLen(alphaend);
1526 
1527   /* If no dash, but second letters or numbers present, reject. */
1528   if (dash == FALSE)
1529   {
1530     if (length_ne != 0 || length_ae != 0)
1531       retval = -1;
1532   }
1533   /* Check for situations like "AAA-123" or "222-ABC". */
1534   if (dash == TRUE)
1535   {
1536     if (length_ne == 0 && length_ab == 0)
1537       retval = -1;
1538     else if (length_ae == 0 && length_nb == 0)
1539       retval = -1;
1540   }
1541 
1542   /* The following expands "F502-512" into "F502-F512" and
1543   checks, for entries like "12a-12c" that a > c.  "12aa-12ab",
1544   "125G-137A", "125-G137" would be rejected. */
1545   if (retval == 0)
1546   {
1547     if (length_ab > 0)
1548     {
1549       if (length_ae > 0)
1550       {
1551         if (StringCmp(alphabegin, alphaend) != 0)
1552         {
1553           if (length_ab != 1 || length_ae != 1)
1554             retval = -1;
1555           else if (*alphabegin > *alphaend)
1556             retval = -1;
1557         }
1558       }
1559       else
1560       {
1561         alphaend = alphabegin;
1562         length_ae = length_ab;
1563       }
1564     }
1565     else if (length_ae > 0)
1566       retval = -1;
1567   }
1568 
1569 /* The following expands "125-37" into "125-137".  */
1570   if (retval == 0)
1571   {
1572     if (length_nb > 0)
1573     {
1574       if (length_ne > 0)
1575       {
1576         diff = length_nb - length_ne;
1577         if (diff > 0)
1578         {
1579           index=0;
1580           while (numend[index] != '\0')
1581           {
1582             temp[index+diff] = numend[index];
1583             index++;
1584           }
1585           temp[index+diff] = numend[index];
1586           for (index=0; index<diff; index++)
1587             temp[index] = numbegin[index];
1588           index=0;
1589           while (temp[index] != '\0')
1590           {
1591             numend[index] = temp[index];
1592             index++;
1593           }
1594           numend[index] = temp[index];
1595         }
1596       }
1597       else
1598       {
1599         numend = numbegin;
1600         length_ne = length_nb;
1601       }
1602 
1603     }
1604     else if (length_ne > 0)
1605       retval = -1;
1606   /* Check that the first number is <= the second (expanded) number. */
1607     if (retval == 0)
1608     {
1609   /*    sscanf(numbegin, "%ld", &num_type);
1610       num1 = (Int4) num_type;
1611       sscanf(  numend, "%ld", &num_type);
1612       num2 = (Int4) num_type;
1613   */
1614       num1 = (Int4) atol(numbegin);
1615       num2 = (Int4) atol(numend);
1616       if (num2 < num1)
1617         retval = -1;
1618     }
1619   }
1620 
1621   if (retval == -1)
1622   {
1623     out_pages = StringCpy(out_pages, in_pages);
1624   }
1625   else
1626   {
1627     ptr = out_pages;
1628   /* Place expanded and validated page numbers into "out_pages". */
1629     if (first_alpha)
1630     {
1631       while (*alphabegin != '\0')
1632       {
1633         *ptr = *alphabegin;
1634         alphabegin++;
1635         ptr++;
1636       }
1637       while (*numbegin != '\0')
1638       {
1639         *ptr = *numbegin;
1640         numbegin++;
1641         ptr++;
1642       }
1643       if (dash == TRUE)
1644       {
1645         *ptr = '-';
1646         ptr++;
1647         while (*alphaend != '\0')
1648         {
1649           *ptr = *alphaend;
1650           alphaend++;
1651           ptr++;
1652         }
1653         while (*numend != '\0')
1654         {
1655           *ptr = *numend;
1656           numend++;
1657           ptr++;
1658         }
1659       }
1660       *ptr = '\0';
1661     }
1662     else
1663     {
1664       while (*numbegin != '\0')
1665       {
1666         *ptr = *numbegin;
1667         numbegin++;
1668         ptr++;
1669       }
1670       while (*alphabegin != '\0')
1671       {
1672         *ptr = *alphabegin;
1673         alphabegin++;
1674         ptr++;
1675       }
1676       if (dash == TRUE)
1677       {
1678         *ptr = '-';
1679         ptr++;
1680         while (*numend != '\0')
1681         {
1682           *ptr = *numend;
1683           numend++;
1684           ptr++;
1685         }
1686         while (*alphaend != '\0')
1687         {
1688           *ptr = *alphaend;
1689           alphaend++;
1690           ptr++;
1691         }
1692       }
1693       *ptr = '\0';
1694     }
1695   }
1696   return retval;
1697 }
1698 
1699 /* !!! still need to add StripParanthesis equivalent !!! */
1700 
DoSup(ValNodePtr PNTR head,CharPtr issue,CharPtr part_sup,CharPtr part_supi)1701 static void DoSup (
1702   ValNodePtr PNTR head,
1703   CharPtr issue,
1704   CharPtr part_sup,
1705   CharPtr part_supi
1706 )
1707 
1708 {
1709   size_t   len;
1710   CharPtr  str;
1711   CharPtr  temp;
1712 
1713   len = StringLen (issue) + StringLen (part_sup) + StringLen (part_supi) + 30;
1714   str = MemNew (sizeof (Char) * len);
1715   if (str == NULL) return;
1716   temp = str;
1717 
1718   if (! StringHasNoText (part_sup)) {
1719     *temp = ' ';
1720     temp++;
1721     *temp = '(';
1722     temp++;
1723     temp = StringMove (temp, part_sup);
1724     *temp = ')';
1725     temp++;
1726   }
1727   if (StringHasNoText (issue) && StringHasNoText (part_supi)) {
1728     ValNodeCopyStr (head, 0, str);
1729     MemFree (str);
1730     return;
1731   }
1732   *temp = ' ';
1733   temp++;
1734   *temp = '(';
1735   temp++;
1736   if (! StringHasNoText (issue)) {
1737     temp = StringMove (temp, issue);
1738   }
1739   if (! StringHasNoText (part_supi)) {
1740     *temp = ' ';
1741     temp++;
1742     temp = StringMove (temp, part_supi);
1743   }
1744   *temp = ')';
1745   temp++;
1746   ValNodeCopyStr (head, 0, str);
1747   MemFree (str);
1748 }
1749 
FormatCitJour(FmtType format,Boolean citArtIsoJta,CitJourPtr cjp)1750 static CharPtr FormatCitJour (
1751   FmtType format,
1752   Boolean citArtIsoJta,
1753   CitJourPtr cjp
1754 )
1755 
1756 {
1757   Char        buf [256];
1758   DatePtr     dp;
1759   Boolean     electronic_journal = FALSE;
1760   ValNodePtr  head = NULL;
1761   ImprintPtr  imp;
1762   CharPtr     issue = NULL;
1763   Char        pages [128];
1764   CharPtr     part_sup = NULL;
1765   CharPtr     part_supi = NULL;
1766   CharPtr     rsult = NULL;
1767   CharPtr     title = NULL;
1768   ValNodePtr  ttl;
1769   CharPtr     volume;
1770   Char        year [8];
1771 
1772   if (cjp == NULL) return NULL;
1773 
1774   ttl = cjp->title;
1775   if (ttl == NULL) return NULL;
1776 
1777   /* always use iso_jta title if present */
1778 
1779   while (ttl != NULL && ttl->choice != Cit_title_iso_jta) {
1780     ttl = ttl->next;
1781   }
1782 
1783   imp = cjp->imp;
1784   if (imp == NULL) return NULL;
1785 
1786   /* release mode requires iso_jta title */
1787 
1788   if (imp->pubstatus == 3 || imp->pubstatus == 10) {
1789     electronic_journal = TRUE;
1790   }
1791 
1792   if (ttl == NULL) {
1793     ttl = cjp->title;
1794     if (ttl != NULL && ttl->choice == Cit_title_name) {
1795       title = (CharPtr) ttl->data.ptrvalue;
1796       if (title != NULL && StringNCmp (title, "(er)", 4) == 0) {
1797         electronic_journal = TRUE;
1798       }
1799     }
1800     if (citArtIsoJta && (! electronic_journal)) return NULL;
1801   }
1802 
1803   dp = imp->date;
1804   year [0] = '\0';
1805   if (dp != NULL) {
1806     if (dp->data [0] == 1) {
1807       if (dp->data [1] != 0) {
1808         sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
1809       }
1810     } else if (StringDoesHaveText (dp->str) && StringCmp (dp->str, "?") != 0) {
1811       StringCpy (year, " (");
1812       StringNCat (year, dp->str, 4);
1813       StringCat (year, ")");
1814     }
1815   }
1816 
1817   if (imp->prepub == 1 || imp->prepub == 255) {
1818     sprintf (buf, "Unpublished %s", year);
1819     return StringSave (buf);
1820   }
1821 
1822   if (ttl != NULL) {
1823     title = (CharPtr) ttl->data.ptrvalue;
1824   }
1825   if (StringLen (title) < 3) return StringSave (".");
1826 
1827   /*
1828   if (imp->pubstatus == 3 || imp->pubstatus == 10) {
1829     ValNodeCopyStr (&head, 0, "(er) ");
1830   }
1831   */
1832 
1833   ValNodeCopyStr (&head, 0, title);
1834 
1835   volume = imp->volume;
1836   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1837     issue = imp->issue;
1838     part_sup = imp->part_sup;
1839     part_supi = imp->part_supi;
1840   }
1841   pages [0] = '\0';
1842   if (electronic_journal) {
1843     StringNCpy_0 (pages, imp->pages, sizeof (pages));
1844   } else {
1845     FixPages (pages, imp->pages);
1846   }
1847 
1848   if (! StringHasNoText (volume)) {
1849     AddValNodeString (&head, " ", volume, NULL);
1850   }
1851 
1852   if ((! StringHasNoText (volume)) || (! StringHasNoText (pages))) {
1853     DoSup (&head, issue, part_sup, part_supi);
1854   }
1855 
1856   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1857     if (! StringHasNoText (pages)) {
1858       AddValNodeString (&head, ", ", pages, NULL);
1859     }
1860   } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1861     if (! StringHasNoText (pages)) {
1862       AddValNodeString (&head, ":", pages, NULL);
1863     } else if (imp->prepub == 2 || (StringHasNoText (volume))) {
1864       ValNodeCopyStr (&head, 0, " 0:0-0");
1865     }
1866   }
1867 
1868   ValNodeCopyStr (&head, 0, year);
1869 
1870   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1871     if (imp->prepub == 2) {
1872       ValNodeCopyStr (&head, 0, " In press");
1873     } else if (imp->pubstatus == 10 && StringHasNoText (pages)) {
1874       ValNodeCopyStr (&head, 0, " In press");
1875     }
1876   }
1877 
1878   rsult = MergeFFValNodeStrs (head);
1879   ValNodeFreeData (head);
1880 
1881   return rsult;
1882 }
1883 
MakeAffilStr(AffilPtr afp)1884 static CharPtr MakeAffilStr (
1885   AffilPtr afp
1886 )
1887 
1888 {
1889   ValNodePtr  head = NULL;
1890   CharPtr     prefix = "";
1891   CharPtr     rsult = NULL;
1892 
1893   if (afp == NULL) return NULL;
1894 
1895   if (! StringHasNoText (afp->affil)) {
1896     ValNodeCopyStr (&head, 0, afp->affil);
1897     prefix = ", ";
1898   }
1899 
1900   if (afp->choice == 2) {
1901     if (! StringHasNoText (afp->div)) {
1902       AddValNodeString (&head, prefix, afp->div, NULL);
1903       prefix = ", ";
1904     }
1905     if (! StringHasNoText (afp->street)) {
1906       AddValNodeString (&head, prefix, afp->street, NULL);
1907       prefix = ", ";
1908     }
1909     if (! StringHasNoText (afp->city)) {
1910       AddValNodeString (&head, prefix, afp->city, NULL);
1911       prefix = ", ";
1912     }
1913     if (! StringHasNoText (afp->sub)) {
1914       AddValNodeString (&head, prefix, afp->sub, NULL);
1915       prefix = ", ";
1916     }
1917     if (! StringHasNoText (afp->country)) {
1918       AddValNodeString (&head, prefix, afp->country, NULL);
1919       prefix = ", ";
1920     }
1921   }
1922 
1923   rsult = MergeFFValNodeStrs (head);
1924   ValNodeFreeData (head);
1925 
1926   return rsult;
1927 }
1928 
GetAffil(AffilPtr afp)1929 static CharPtr GetAffil (
1930   AffilPtr afp
1931 )
1932 
1933 {
1934   Boolean need_comma=FALSE;
1935   CharPtr string=NULL, temp, ptr;
1936   Char ch;
1937   Int2 aflen=15;
1938 
1939   if (afp == NULL) return NULL;
1940   if (afp) {
1941     if (afp -> choice == 1){
1942       if (afp -> affil){
1943         aflen += StringLen(afp -> affil);
1944       }
1945     }else if (afp -> choice == 2){
1946       aflen += StringLen (afp -> affil) +
1947       StringLen (afp -> div) +
1948       StringLen (afp -> city) +
1949       StringLen (afp -> sub) +
1950       StringLen (afp -> street) +
1951       StringLen (afp -> country) + StringLen(afp->postal_code);
1952     }
1953 
1954     temp = string = MemNew(aflen);
1955 
1956     if ( afp -> choice == 1){
1957        if (afp -> affil){
1958         ptr = afp->affil;
1959         while ((*temp = *ptr) != '\0')
1960         {
1961           temp++; ptr++;
1962         }
1963        }
1964     }else if (afp -> choice == 2){
1965 
1966       if( afp -> div) {
1967         if (need_comma)
1968         {
1969           *temp = ','; temp++;
1970           *temp = ' '; temp++;
1971         }
1972         ptr = afp->div;
1973         while ((*temp = *ptr) != '\0')
1974         {
1975           temp++; ptr++;
1976         }
1977         need_comma = TRUE;
1978       }
1979 
1980       if(afp -> affil) {
1981         if (need_comma)
1982         {
1983           *temp = ','; temp++;
1984           *temp = ' '; temp++;
1985         }
1986         ptr = afp->affil;
1987         while ((*temp = *ptr) != '\0')
1988         {
1989           temp++; ptr++;
1990         }
1991         need_comma = TRUE;
1992       }
1993 
1994       if(afp -> street) {
1995         if (need_comma)
1996         {
1997           *temp = ','; temp++;
1998           *temp = ' '; temp++;
1999         }
2000         ptr = afp->street;
2001         while ((*temp = *ptr) != '\0')
2002         {
2003           temp++; ptr++;
2004         }
2005         need_comma = TRUE;
2006       }
2007 
2008       if( afp -> city) {
2009         if (need_comma)
2010         {
2011           *temp = ','; temp++;
2012           *temp = ' '; temp++;
2013         }
2014         ptr = afp->city;
2015         while ((*temp = *ptr) != '\0')
2016         {
2017           temp++; ptr++;
2018         }
2019         need_comma = TRUE;
2020       }
2021 
2022       if( afp -> sub) {
2023         if (need_comma)
2024         {
2025           *temp = ','; temp++;
2026           *temp = ' '; temp++;
2027         }
2028         ptr = afp->sub;
2029         while ((*temp = *ptr) != '\0')
2030         {
2031           temp++; ptr++;
2032         }
2033         need_comma = TRUE;
2034       }
2035 
2036       if( afp -> postal_code){
2037         *temp = ' ';
2038         temp++;
2039         ptr = afp->postal_code;
2040         while ((*temp = *ptr) != '\0')
2041         {
2042           temp++; ptr++;
2043         }
2044       }
2045 
2046       if( afp -> country){
2047         if (need_comma)
2048         {
2049           *temp = ','; temp++;
2050           *temp = ' '; temp++;
2051         }
2052         ptr = afp->country;
2053         while ((*temp = *ptr) != '\0')
2054         {
2055           temp++; ptr++;
2056         }
2057         need_comma = TRUE;
2058       }
2059     }
2060     temp++;
2061     *temp = '\0';
2062   }
2063 
2064     /* convert double quotes to single quotes */
2065 
2066     ptr = string;
2067     ch = *ptr;
2068     while (ch != '\0') {
2069       if (ch == '\"') {
2070         *ptr = '\'';
2071       }
2072       ptr++;
2073       ch = *ptr;
2074     }
2075 
2076   return string;
2077 }
2078 
GetFlatFileAffilString(AffilPtr afp)2079 NLM_EXTERN CharPtr GetFlatFileAffilString (AffilPtr afp)
2080 {
2081   return GetAffil (afp);
2082 }
2083 
2084 
FormatCitBookArt(FmtType format,CitBookPtr cbp)2085 static CharPtr FormatCitBookArt (
2086   FmtType format,
2087   CitBookPtr cbp
2088 )
2089 
2090 {
2091   AffilPtr     afp;
2092   AuthListPtr  alp;
2093   CharPtr      book_title = NULL;
2094   Char         buf [256];
2095   Char         ch;
2096   DatePtr      dp;
2097   ValNodePtr   head = NULL;
2098   ImprintPtr   imp;
2099   CharPtr      issue = NULL;
2100   ValNodePtr   names = NULL;
2101   Char         pages [128];
2102   CharPtr      part_sup = NULL;
2103   CharPtr      part_supi = NULL;
2104   CharPtr      rsult = NULL;
2105   CharPtr      str;
2106   CharPtr      title;
2107   ValNodePtr   ttl;
2108   ValNodePtr   vnp;
2109   CharPtr      volume;
2110   Char         year [8];
2111 
2112   if (cbp == NULL) return NULL;
2113 
2114   ttl = cbp->title;
2115   if (ttl == NULL) return NULL;
2116 
2117   imp = cbp->imp;
2118   if (imp == NULL) return NULL;
2119 
2120   dp = imp->date;
2121   year [0] = '\0';
2122   if (dp != NULL) {
2123     if (dp->data [0] == 1) {
2124       if (dp->data [1] != 0) {
2125         sprintf (year, "(%ld)", (long) (1900 + dp->data [1]));
2126       }
2127     } else {
2128       StringCpy (year, "(");
2129       StringNCat (year, dp->str, 4);
2130       StringNCat (year, ")", 1);
2131     }
2132   }
2133 
2134   if (imp->prepub == 1 || imp->prepub == 255) {
2135     sprintf (buf, "Unpublished %s", year);
2136     return StringSave (buf);
2137   }
2138 
2139   title = (CharPtr) ttl->data.ptrvalue;
2140   if (StringLen (title) < 3) return StringSave (".");
2141 
2142   ValNodeCopyStr (&head, 0, "(in) ");
2143 
2144   alp = cbp->authors;
2145   if (alp != NULL) {
2146     str = GetAuthorsString (format, alp, NULL, NULL, NULL);
2147     if (str != NULL) {
2148       ValNodeCopyStr (&head, 0, str);
2149       names = alp->names;
2150       if (names != NULL) {
2151         if (names->next != NULL) {
2152           ValNodeCopyStr (&head, 0, " (Eds.);");
2153         } else {
2154           ValNodeCopyStr (&head, 0, " (Ed.);");
2155         }
2156       }
2157       ValNodeCopyStr (&head, 0, "\n");
2158     }
2159     MemFree (str);
2160   }
2161 
2162   book_title = StringSaveNoNull (title);
2163   vnp = ValNodeAddStr (&head, 0, book_title);
2164   if (book_title != NULL) {
2165 
2166     /* make book title all caps */
2167 
2168     title = book_title;
2169     ch = *title;
2170     while (ch != '\0') {
2171       *title = TO_UPPER (ch);
2172       title++;
2173       ch = *title;
2174     }
2175   }
2176 
2177   volume = imp->volume;
2178   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2179     issue = imp->issue;
2180     part_sup = imp->part_sup;
2181     part_supi = imp->part_supi;
2182   }
2183   pages [0] = '\0';
2184   FixPages (pages, imp->pages);
2185 
2186   if ((! StringHasNoText (volume)) && (StringCmp (volume, "0") != 0)) {
2187     AddValNodeString (&head, ", Vol. ", volume, NULL);
2188     DoSup (&head, issue, part_sup, part_supi);
2189   }
2190 
2191   if (! StringHasNoText (pages)) {
2192     AddValNodeString (&head, ": ", pages, NULL);
2193   }
2194 
2195   if (book_title != NULL) {
2196     ValNodeCopyStr (&head, 0, ";\n");
2197   }
2198 
2199   afp = imp->pub;
2200   if (afp != NULL) {
2201     str = MakeAffilStr (afp);
2202     if (str != NULL) {
2203       ValNodeCopyStr (&head, 0, str);
2204       ValNodeCopyStr (&head, 0, " ");
2205       MemFree (str);
2206     }
2207   }
2208 
2209   AddValNodeString (&head, NULL, year, NULL);
2210 
2211   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2212     if (imp->prepub == 2) {
2213       ValNodeCopyStr (&head, 0, " In press");
2214     }
2215   }
2216 
2217   rsult = MergeFFValNodeStrs (head);
2218   ValNodeFreeData (head);
2219 
2220   return rsult;
2221 }
2222 
FormatCitBook(FmtType format,CitBookPtr cbp)2223 static CharPtr FormatCitBook (
2224   FmtType format,
2225   CitBookPtr cbp
2226 )
2227 
2228 {
2229   AffilPtr   afp;
2230   char       year[5];
2231   CharPtr    bookTitle=NULL;
2232   CharPtr    retval = NULL;
2233   CharPtr    temp;
2234   DatePtr    dp;
2235   ImprintPtr ip;
2236   int        aflen = 0;
2237   CharPtr    p;
2238   CharPtr    affilStr = NULL;
2239 
2240   /* Check parameters */
2241 
2242   if (cbp == NULL)
2243     return NULL;
2244 
2245   if ( cbp -> othertype != 0)
2246     return NULL;
2247 
2248   ip = cbp -> imp;
2249 
2250   /* Format the year */
2251 
2252   dp = ip -> date;
2253   year[0] = '\0';
2254 
2255   if ( dp -> data[0] == 1)
2256     sprintf(year,"%ld",(long) ( 1900+dp -> data[1]));
2257   else
2258     {
2259       StringNCpy( (CharPtr) year, (CharPtr) dp -> str, (size_t) 4);
2260       year[4] = '\0';
2261     }
2262 
2263   /* Get the book title */
2264 
2265   if (cbp->title)
2266     bookTitle = StringSave(cbp -> title -> data.ptrvalue);
2267 
2268   /* Get the affiliation length */
2269 
2270   if ( ip -> pub){
2271     afp = ip -> pub;
2272     aflen = StringLen(afp -> affil)+ 5;
2273     if ( afp -> choice == 2){
2274       aflen += 3 + StringLen(afp -> div);
2275       aflen += 3 + StringLen(afp -> street);
2276       aflen += 3 + StringLen(afp -> city);
2277       aflen += 3 + StringLen(afp -> sub);
2278       aflen += 3 + StringLen(afp -> country);
2279     }
2280   } else{
2281     aflen = 22;
2282   }
2283   if (ip->prepub == 2)
2284     aflen += 10;
2285 
2286   /* Create a Char String big enough to hold */
2287   /* the title, year, and affiliation.       */
2288 
2289   temp = retval = MemNew( (size_t) (30+StringLen( bookTitle)+StringLen( year) + aflen) );
2290 
2291   /* Convert the title to upper case and */
2292   /* add it to the string.               */
2293 
2294   if (bookTitle != NULL) {
2295     for ( p = bookTitle; *p; p++) {
2296       *p = TO_UPPER(*p);
2297     }
2298   }
2299 
2300   /* temp = StringMove(temp, "Book: "); */
2301   temp = StringMove(temp, "(in) ");
2302   temp = StringMove(temp, bookTitle);
2303   temp = StringMove(temp, ".");
2304 
2305   /* Add the affiliation to the string */
2306 
2307   if ( ip -> pub)
2308     {
2309       afp = ip -> pub;
2310       *temp = ' ';
2311       temp++;
2312       affilStr = MakeAffilStr(afp);
2313       temp = StringMove(temp,affilStr);
2314     }
2315 
2316   /* Add the year to the string */
2317 
2318   if (year[0] != '\0')
2319     {
2320       if (affilStr != NULL)
2321         temp = StringMove(temp," (");
2322       else
2323         temp = StringMove(temp, "(");
2324       temp = StringMove(temp, year);
2325       temp = StringMove(temp, ")");
2326     }
2327 
2328   /* If in press, add note */
2329 
2330   if (ip->prepub == 2)
2331     temp = StringMove(temp, ", In press");
2332 
2333   /* Clean up and return */
2334 
2335   if (bookTitle)
2336     MemFree(bookTitle);
2337 
2338   return retval;
2339 
2340 }
2341 
FormatThesis(FmtType format,CitBookPtr cbp)2342 static CharPtr FormatThesis (
2343   FmtType format,
2344   CitBookPtr cbp
2345 )
2346 
2347 {
2348   AffilPtr     afp;
2349   Char         ch;
2350   DatePtr      dp;
2351   ValNodePtr   head = NULL;
2352   ImprintPtr   imp;
2353   CharPtr      ptr;
2354   CharPtr      rsult = NULL;
2355   CharPtr      str;
2356   CharPtr      suffix = NULL;
2357   Char         year [8];
2358 
2359   if (cbp == NULL) return NULL;
2360   if (cbp->othertype != 2 || cbp->let_type != 3) return NULL;
2361 
2362   imp = cbp->imp;
2363   if (imp == NULL) return NULL;
2364 
2365   dp = imp->date;
2366   year [0] = '\0';
2367   if (dp != NULL) {
2368     if (dp->data [0] == 1) {
2369       if (dp->data [1] != 0) {
2370         sprintf (year, "%ld", (long) (1900 + dp->data [1]));
2371       }
2372     } else {
2373       StringNCpy (year, dp->str, (size_t) 4);
2374       year [4] = '\0';
2375     }
2376   }
2377 
2378   AddValNodeString (&head, "Thesis (", year, ")");
2379 
2380   if (imp->prepub == 2) {
2381     suffix = ", In press";
2382   }
2383 
2384   str = NULL;
2385   afp = imp->pub;
2386   if (afp != NULL) {
2387     if (afp->choice == 1) {
2388       str = StringSave (afp->affil);
2389     } else if (afp->choice == 2) {
2390       str = MakeAffilStr (afp);
2391     }
2392   }
2393 
2394   if (str != NULL) {
2395 
2396     /* convert double quotes to single quotes */
2397 
2398     ptr = str;
2399     ch = *ptr;
2400     while (ch != '\0') {
2401       if (ch == '\"') {
2402         *ptr = '\'';
2403       }
2404       ptr++;
2405       ch = *ptr;
2406     }
2407     AddValNodeString (&head, " ", str, suffix);
2408     MemFree (str);
2409   }
2410 
2411   rsult = MergeFFValNodeStrs (head);
2412   ValNodeFreeData (head);
2413 
2414   return rsult;
2415 }
2416 
FormatCitArt(FmtType format,Boolean citArtIsoJta,CitArtPtr cap)2417 static CharPtr FormatCitArt (
2418   FmtType format,
2419   Boolean citArtIsoJta,
2420   CitArtPtr cap
2421 )
2422 
2423 {
2424   CitBookPtr  cbp;
2425   CitJourPtr  cjp;
2426   CharPtr     rsult = NULL;
2427 
2428   if (cap == NULL) return NULL;
2429 
2430   switch (cap->from) {
2431     case 1 :
2432       cjp = (CitJourPtr) cap->fromptr;
2433       if (cjp != NULL) {
2434         rsult = FormatCitJour (format, citArtIsoJta, cjp);
2435       }
2436       break;
2437     case 2 :
2438       cbp = (CitBookPtr) cap->fromptr;
2439       if (cbp != NULL) {
2440         rsult = FormatCitBookArt (format, cbp);
2441       }
2442       break;
2443     case 3 :
2444       cbp = (CitBookPtr) cap->fromptr;
2445       if (cbp != NULL) {
2446         rsult = FormatCitBookArt (format, cbp);
2447       }
2448       break;
2449     default :
2450       break;
2451   }
2452 
2453   return rsult;
2454 }
2455 
FormatCitPat(FmtType format,ModType mode,CitPatPtr cpp,SeqIdPtr seqidp,IntAsn2gbJobPtr ajp)2456 static CharPtr FormatCitPat (
2457   FmtType format,
2458   ModType mode,
2459   CitPatPtr cpp,
2460   SeqIdPtr seqidp,
2461   IntAsn2gbJobPtr ajp
2462 )
2463 
2464 {
2465   AffilPtr       afp;
2466   AuthListPtr    alp;
2467   IdPatPtr       cit;
2468   CharPtr        consortium = NULL;
2469   Char           date [40];
2470   ValNodePtr     head = NULL;
2471   Boolean        is_us_pre_grant = FALSE;
2472   CharPtr        prefix = NULL;
2473   CharPtr        rsult = NULL;
2474   SeqIdPtr       sip;
2475   CharPtr        str;
2476   CharPtr        suffix = NULL;
2477   PatentSeqIdPtr psip;
2478   Int4           pat_seqid = 0;
2479   Char           buf[10];
2480 
2481   if (cpp == NULL) return NULL;
2482 
2483   if (StringHasNoText (cpp->number) &&
2484       StringDoesHaveText (cpp->app_number) &&
2485       StringCmp (cpp->country, "US") == 0 &&
2486       mode != RELEASE_MODE) {
2487     for (sip = seqidp; sip != NULL; sip = sip->next) {
2488       if (sip->choice != SEQID_PATENT) continue;
2489       psip = (PatentSeqIdPtr) sip->data.ptrvalue;
2490       if (psip == NULL) continue;
2491       cit = psip->cit;
2492       if (cit == NULL) continue;
2493       if (StringDoesHaveText (cit->app_number)) {
2494         is_us_pre_grant = TRUE;
2495       }
2496     }
2497   }
2498 
2499   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2500     if (is_us_pre_grant) {
2501       ValNodeCopyStr (&head, 0, "Pre-Grant Patent: ");
2502       suffix = " ";
2503     } else {
2504       ValNodeCopyStr (&head, 0, "Patent: ");
2505       suffix = " ";
2506     }
2507   } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2508     ValNodeCopyStr (&head, 0, "Patent number ");
2509   }
2510 
2511   if (! StringHasNoText (cpp->country)) {
2512     AddValNodeString (&head, NULL, cpp->country, suffix);
2513   }
2514 
2515   if (! StringHasNoText (cpp->number)) {
2516     if (ajp != NULL && GetWWW (ajp) && StringCmp (cpp->country, "US") == 0) {
2517       ValNodeCopyStr (&head, 0, "<a href=\"");
2518       ValNodeCopyStr (&head, 0, link_uspto);
2519       ValNodeCopyStr (&head, 0, cpp->number);
2520       ValNodeCopyStr (&head, 0, "\">");
2521       ValNodeCopyStr (&head, 0, cpp->number);
2522       ValNodeCopyStr (&head, 0, "</a>");
2523     } else {
2524       ValNodeCopyStr (&head, 0, cpp->number);
2525     }
2526   } else if (! StringHasNoText (cpp->app_number)) {
2527     if (is_us_pre_grant) {
2528       AddValNodeString (&head, NULL, cpp->app_number, NULL);
2529     } else {
2530       AddValNodeString (&head, "(", cpp->app_number, ")");
2531     }
2532   }
2533 
2534   if (! StringHasNoText (cpp->doc_type)) {
2535     AddValNodeString (&head, "-", cpp->doc_type, NULL);
2536   }
2537 
2538   /* pat_seqid test */
2539 
2540   for (sip = seqidp; sip != NULL; sip = sip->next) {
2541     if (sip->choice == SEQID_PATENT) {
2542       psip = (PatentSeqIdPtr) sip -> data.ptrvalue;
2543       if (psip != NULL) {
2544         pat_seqid = psip->seqid;
2545       }
2546     }
2547   }
2548   if (pat_seqid > 0) {
2549     if (format == EMBL_FMT) {
2550       sprintf(buf,"%s%ld%s", "/", (long) pat_seqid, ", ");
2551       ValNodeCopyStr (&head, 0, buf);
2552     } else {
2553       sprintf(buf,"%s%ld ", " ", (long) pat_seqid);
2554       ValNodeCopyStr (&head, 0, buf);
2555     }
2556   } else {
2557     ValNodeCopyStr (&head, 0, " ");
2558   }
2559 
2560   /* Date */
2561 
2562   date [0] = '\0';
2563   if (cpp->date_issue != NULL) {
2564     DateToFF (date, cpp->date_issue, FALSE);
2565   } else if (cpp->app_date != NULL) {
2566     DateToFF (date, cpp->app_date, FALSE);
2567   }
2568   if (! StringHasNoText (date)) {
2569     ValNodeCopyStr (&head, 0, date);
2570   }
2571 
2572   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2573     ValNodeCopyStr (&head, 0, ";");
2574   } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2575     ValNodeCopyStr (&head, 0, ".");
2576   }
2577 
2578   alp = cpp->authors;
2579   if (alp != NULL) {
2580     afp = alp->affil;
2581     if (afp != NULL) {
2582       suffix = NULL;
2583       if (afp->choice == 2) {
2584         suffix = ";";
2585       }
2586 
2587       /* If any of the affiliation fields are */
2588       /* non-blank, put them on a new line.   */
2589 
2590       if ((! StringHasNoText (afp->affil)) ||
2591           (! StringHasNoText (afp->street)) ||
2592           (! StringHasNoText (afp->div)) ||
2593           (! StringHasNoText (afp->city)) ||
2594           (! StringHasNoText (afp->sub)) ||
2595           (! StringHasNoText (afp->country)))
2596         ValNodeCopyStr (&head, 0, "\n");
2597 
2598       /* Write out the affiliation fields */
2599 
2600       if (! StringHasNoText (afp->affil)) {
2601         AddValNodeString (&head, NULL, afp->affil, suffix);
2602         prefix = " ";
2603       }
2604       if (! StringHasNoText (afp->street)) {
2605         AddValNodeString (&head, prefix, afp->street, ";");
2606         prefix = " ";
2607       }
2608       if (! StringHasNoText (afp->div)) {
2609         AddValNodeString (&head, prefix, afp->div, ";");
2610         prefix = " ";
2611       }
2612       if (! StringHasNoText (afp->city)) {
2613         AddValNodeString (&head, prefix, afp->city, NULL);
2614         prefix = ", ";
2615       }
2616       if (! StringHasNoText (afp->sub)) {
2617         AddValNodeString (&head, prefix, afp->sub, NULL);
2618       }
2619       if (! StringHasNoText (afp->country)) {
2620         AddValNodeString (&head, ";\n", afp->country, ";");
2621       }
2622     }
2623   }
2624 
2625   alp = cpp->assignees;
2626   if (alp != NULL) {
2627     str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
2628     afp = alp->affil;
2629     if (afp != NULL) {
2630       suffix = NULL;
2631       if (afp->choice == 2) {
2632         suffix = ";";
2633       }
2634 
2635       /* If any of the affiliation fields are */
2636       /* non-blank, put them on a new line.   */
2637 
2638       if ((! StringHasNoText (str)) ||
2639           (! StringHasNoText (consortium)) ||
2640           (! StringHasNoText (afp->affil)) ||
2641           (! StringHasNoText (afp->street)) ||
2642           (! StringHasNoText (afp->div)) ||
2643           (! StringHasNoText (afp->city)) ||
2644           (! StringHasNoText (afp->sub)) ||
2645           (! StringHasNoText (afp->country)))
2646         ValNodeCopyStr (&head, 0, "\n");
2647 
2648       if (! StringHasNoText (str)) {
2649         AddValNodeString (&head, NULL, str, ";");
2650         prefix = " ";
2651       }
2652       if (! StringHasNoText (consortium)) {
2653         AddValNodeString (&head, NULL, consortium, ";");
2654         prefix = " ";
2655       }
2656 
2657       /* Write out the affiliation fields */
2658 
2659       if (! StringHasNoText (afp->affil)) {
2660         AddValNodeString (&head, NULL, afp->affil, suffix);
2661         prefix = " ";
2662       }
2663       if (! StringHasNoText (afp->street)) {
2664         AddValNodeString (&head, prefix, afp->street, ";");
2665         prefix = " ";
2666       }
2667       if (! StringHasNoText (afp->div)) {
2668         AddValNodeString (&head, prefix, afp->div, ";");
2669         prefix = " ";
2670       }
2671       if (! StringHasNoText (afp->city)) {
2672         AddValNodeString (&head, prefix, afp->city, NULL);
2673         prefix = ", ";
2674       }
2675       if (! StringHasNoText (afp->sub)) {
2676         AddValNodeString (&head, prefix, afp->sub, NULL);
2677       }
2678       if (! StringHasNoText (afp->country)) {
2679         AddValNodeString (&head, ";\n", afp->country, ";");
2680       }
2681     }
2682     MemFree (consortium);
2683     MemFree (str);
2684   }
2685 
2686   rsult = MergeFFValNodeStrs (head);
2687   ValNodeFreeData (head);
2688 
2689   /*
2690   s_StringCleanup(rsult);
2691   */
2692 
2693   return rsult;
2694 }
2695 
FormatCitGen(FmtType format,Boolean dropBadCitGens,Boolean is_ed,Boolean noAffilOnUnpub,CitGenPtr cgp)2696 static CharPtr FormatCitGen (
2697   FmtType format,
2698   Boolean dropBadCitGens,
2699   Boolean is_ed,
2700   Boolean noAffilOnUnpub,
2701   CitGenPtr cgp
2702 )
2703 
2704 {
2705   CharPtr      affil = NULL;
2706   AuthListPtr  alp = NULL;
2707   Char         ch;
2708   DatePtr      dp;
2709   ValNodePtr   head = NULL;
2710   CharPtr      inpress = NULL;
2711   CharPtr      journal = NULL;
2712   Char         pages [128];
2713   CharPtr      prefix = NULL;
2714   CharPtr      ptr;
2715   CharPtr      rsult = NULL;
2716   Char         year [8];
2717 
2718   if (cgp == NULL) return NULL;
2719 
2720   if (cgp->journal == NULL && StringNICmp (cgp->cit, "unpublished", 11) == 0) {
2721     if (noAffilOnUnpub) {
2722 
2723       /* !!! temporarily put date in unpublished citation for QA !!! */
2724 
2725       if (dropBadCitGens && is_ed) {
2726         year [0] = '\0';
2727         dp = cgp->date;
2728         if (dp != NULL) {
2729           if (dp->data [0] == 1) {
2730             if (dp->data [1] != 0) {
2731               sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
2732             }
2733           } else {
2734             StringCpy (year, " (");
2735             StringNCat (year, dp->str, 4);
2736             StringCat (year, ")");
2737           }
2738         }
2739         AddValNodeString (&head, NULL, "Unpublished", NULL);
2740         AddValNodeString (&head, NULL, year, NULL);
2741         rsult = MergeFFValNodeStrs (head);
2742         ValNodeFreeData (head);
2743         return rsult;
2744       }
2745 
2746       /* !!! remove above section once QA against asn2ff is done !!! */
2747 
2748       return StringSave ("Unpublished");
2749     }
2750 
2751     alp = cgp->authors;
2752     if (alp != NULL) {
2753       affil = GetAffil (alp->affil);
2754       if (! StringHasNoText (affil)) {
2755         rsult = MemNew ((size_t) StringLen (affil) + (size_t) StringLen (cgp->cit) + 15);
2756         StringCpy (rsult, "Unpublished ");
2757         StringCat (rsult, affil);
2758         TrimSpacesAroundString (rsult);
2759         return rsult;
2760       }
2761     }
2762 
2763     rsult = StringSave (cgp->cit);
2764     TrimSpacesAroundString (rsult);
2765     return rsult;
2766   }
2767 
2768   year [0] = '\0';
2769   dp = cgp->date;
2770   if (dp != NULL) {
2771     if (dp->data [0] == 1) {
2772       if (dp->data [1] != 0) {
2773         sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
2774       }
2775     } else {
2776       StringCpy (year, " (");
2777       StringNCat (year, dp->str, 4);
2778       StringCat (year, ")");
2779     }
2780   }
2781 
2782   pages [0] = '\0';
2783   if (cgp->pages != NULL) {
2784     FixPages (pages, cgp->pages);
2785   }
2786 
2787   if (cgp->journal != NULL) {
2788     journal = (CharPtr) cgp->journal->data.ptrvalue;
2789   }
2790   if (cgp->cit != NULL) {
2791     ptr = StringStr (cgp->cit, "Journal=\"");
2792     if (ptr != NULL) {
2793       journal = ptr + 9;
2794     } else if (StringNICmp (cgp->cit, "submitted", 8) == 0 ||
2795                StringNICmp (cgp->cit, "unpublished", 11) == 0) {
2796 
2797       if ((! dropBadCitGens) || journal != NULL) {
2798         inpress = cgp->cit;
2799       } else {
2800         inpress = "Unpublished";
2801       }
2802     } else if (StringNICmp (cgp->cit, "Online Publication", 18) == 0 ||
2803                StringNICmp (cgp->cit, "Published Only in DataBase", 26) == 0 ||
2804                StringNICmp (cgp->cit, "In press", 8) == 0 ) {
2805       inpress = cgp->cit;
2806     } else if (StringNICmp (cgp->cit, "(er) ", 5) == 0) {
2807       journal = cgp->cit;
2808     } else if ((! dropBadCitGens) && journal == NULL) {
2809       journal = cgp->cit;
2810     }
2811   }
2812   if (journal != NULL) {
2813     journal = StringSave (journal);
2814     for (ptr = journal, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
2815       if (ch == '=' || ch == '\"') {
2816         *ptr = '\0';
2817       }
2818     }
2819     ValNodeAddStr (&head, 0, journal);
2820     prefix = " ";
2821   }
2822 
2823   if (! StringHasNoText (inpress)) {
2824     AddValNodeString (&head, prefix, inpress, NULL);
2825     prefix = " ";
2826   }
2827 
2828   if (! StringHasNoText (cgp->volume)) {
2829     AddValNodeString (&head, prefix, cgp->volume, NULL);
2830   }
2831 
2832   if (! StringHasNoText (pages)) {
2833     if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2834       AddValNodeString (&head, ", ", pages, NULL);
2835     } else if (format == EMBL_FMT) {
2836       AddValNodeString (&head, ":", pages, NULL);
2837     }
2838   }
2839 
2840   if (! StringHasNoText (year)) {
2841     AddValNodeString (&head, NULL, year, NULL);
2842   }
2843 
2844   rsult = MergeFFValNodeStrs (head);
2845   ValNodeFreeData (head);
2846 
2847   return rsult;
2848 }
2849 
FormatCitSub(FmtType format,CitSubPtr csp)2850 static CharPtr FormatCitSub (
2851   FmtType format,
2852   CitSubPtr csp
2853 )
2854 
2855 {
2856   CharPtr      affil;
2857   AffilPtr     afp;
2858   AuthListPtr  alp;
2859   Char         buf [256];
2860   Char         date [40];
2861   ValNodePtr   head = NULL;
2862   CharPtr      rsult = NULL;
2863 
2864   if (csp == NULL) return NULL;
2865 
2866   date [0] = '\0';
2867   if (csp->date != NULL) {
2868     DateToFF (date, csp->date, TRUE);
2869   }
2870   if (StringHasNoText (date)) {
2871     StringCpy (date, "\?\?-\?\?\?-\?\?\?\?");
2872   }
2873 
2874   sprintf (buf, "Submitted (%s)", date);
2875   ValNodeCopyStr (&head, 0, buf);
2876 
2877   alp = csp->authors;
2878   if (alp != NULL) {
2879     afp = alp->affil;
2880     if (afp != NULL) {
2881       affil = GetAffil (afp);
2882       Asn2gnbkCompressSpaces (affil);
2883       if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2884         if (StringNCmp(affil, " to the EMBL/GenBank/DDBJ databases.", 36) != 0) {
2885           ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
2886         } else {
2887           ValNodeCopyStr (&head, 0, " ");
2888         }
2889       } else {
2890         ValNodeCopyStr (&head, 0, " ");
2891       }
2892       ValNodeCopyStr (&head, 0, affil);
2893       MemFree (affil);
2894     } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2895       ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
2896     }
2897   }
2898 
2899   rsult = MergeFFValNodeStrs (head);
2900   ValNodeFreeData (head);
2901 
2902   return rsult;
2903 }
2904 
GetPubJournal(FmtType format,ModType mode,Boolean dropBadCitGens,Boolean is_ed,Boolean noAffilOnUnpub,Boolean citArtIsoJta,PubdescPtr pdp,CitSubPtr csp,SeqIdPtr seqidp,IndxPtr index,IntAsn2gbJobPtr ajp)2905 static CharPtr GetPubJournal (
2906   FmtType format,
2907   ModType mode,
2908   Boolean dropBadCitGens,
2909   Boolean is_ed,
2910   Boolean noAffilOnUnpub,
2911   Boolean citArtIsoJta,
2912   PubdescPtr pdp,
2913   CitSubPtr csp,
2914   SeqIdPtr seqidp,
2915   IndxPtr index,
2916   IntAsn2gbJobPtr ajp
2917 )
2918 
2919 {
2920   CitArtPtr        cap;
2921   CitBookPtr       cbp;
2922   CitGenPtr        cgp;
2923   CitPatPtr        cpp;
2924   CharPtr          journal = NULL;
2925   MedlineEntryPtr  mep;
2926   ValNodePtr       vnp;
2927 
2928   if (csp != NULL) {
2929     return FormatCitSub (format, csp);
2930   }
2931   if (pdp == NULL) return NULL;
2932 
2933   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
2934     switch (vnp->choice) {
2935       case PUB_Gen :
2936         cgp = (CitGenPtr) vnp->data.ptrvalue;
2937         if (cgp != NULL) {
2938           if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
2939             if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
2940               break; /* skip just serial number */
2941             }
2942           }
2943           journal = FormatCitGen (format, dropBadCitGens, is_ed, noAffilOnUnpub, cgp);
2944         }
2945         break;
2946       case PUB_Sub :
2947         csp = (CitSubPtr) vnp->data.ptrvalue;
2948         if (csp != NULL) {
2949           journal = FormatCitSub (format, csp);
2950         }
2951         break;
2952       case PUB_Medline :
2953         mep = (MedlineEntryPtr) vnp->data.ptrvalue;
2954         if (mep != NULL) {
2955           cap = mep->cit;
2956           if (cap != NULL) {
2957             journal = FormatCitArt (format, citArtIsoJta, cap);
2958           }
2959         }
2960         break;
2961       case PUB_Article :
2962         cap = (CitArtPtr) vnp->data.ptrvalue;
2963         if (cap != NULL) {
2964           journal = FormatCitArt (format, citArtIsoJta, cap);
2965         }
2966         break;
2967       case PUB_Book :
2968       case PUB_Proc :
2969         cbp = (CitBookPtr) vnp->data.ptrvalue;
2970         if (cbp != NULL) {
2971           journal = FormatCitBook (format, cbp);
2972         }
2973         break;
2974       case PUB_Man :
2975         cbp = (CitBookPtr) vnp->data.ptrvalue;
2976         if (cbp != NULL) {
2977           journal = FormatThesis (format, cbp);
2978         }
2979         break;
2980       case PUB_Patent :
2981         cpp = (CitPatPtr) vnp->data.ptrvalue;
2982         if (cpp != NULL) {
2983           journal = FormatCitPat (format, mode, cpp, seqidp, ajp);
2984         }
2985         break;
2986       default :
2987         break;
2988     }
2989 
2990     /* optionally populate indexes for NCBI internal database */
2991 
2992     if (index != NULL && journal != NULL) {
2993 
2994       /* skip non-informative cit-gens */
2995 
2996       if (StringNICmp (journal, "submitted", 8) == 0 ||
2997           StringNICmp (journal, "unpublished", 11) == 0 ||
2998           StringNICmp (journal, "Online Publication", 18) == 0 ||
2999           StringNICmp (journal, "Published Only in DataBase", 26) == 0) {
3000       } else {
3001         ValNodeCopyStrToHead (&(index->journals), 0, journal);
3002       }
3003     }
3004 
3005     if (journal != NULL) return journal;
3006   }
3007 
3008   return NULL;
3009 }
3010 
GetMuid(PubdescPtr pdp)3011 static Int4 GetMuid (
3012   PubdescPtr pdp
3013 )
3014 
3015 {
3016   ArticleIdPtr     aip;
3017   CitArtPtr        cap;
3018   MedlineEntryPtr  mep;
3019   ValNodePtr       vnp;
3020 
3021   if (pdp == NULL) return 0;
3022 
3023   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3024     switch (vnp->choice) {
3025       case PUB_Medline :
3026         mep = (MedlineEntryPtr) vnp->data.ptrvalue;
3027         if (mep != NULL) {
3028           return mep->uid;
3029         }
3030         break;
3031       case PUB_Muid :
3032         return vnp->data.intvalue;
3033       case PUB_Article:
3034         cap = (CitArtPtr) vnp->data.ptrvalue;
3035         if (cap!= NULL && cap->ids != NULL) {
3036           for (aip = cap->ids; aip != NULL; aip = aip->next) {
3037             if (aip->choice == ARTICLEID_MEDLINE) {
3038               return aip->data.intvalue;
3039             }
3040           }
3041         }
3042       default :
3043         break;
3044     }
3045   }
3046 
3047   return 0;
3048 }
3049 
GetPmid(PubdescPtr pdp)3050 static Int4 GetPmid (
3051   PubdescPtr pdp
3052 )
3053 
3054 {
3055   ArticleIdPtr     aip;
3056   CitArtPtr        cap;
3057   MedlineEntryPtr  mep;
3058   ValNodePtr       vnp;
3059 
3060   if (pdp == NULL) return 0;
3061 
3062   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3063     switch (vnp->choice) {
3064       case PUB_Medline :
3065         mep = (MedlineEntryPtr) vnp->data.ptrvalue;
3066         if (mep != NULL) {
3067           return mep->pmid;
3068         }
3069         break;
3070       case PUB_PMid :
3071         return vnp->data.intvalue;
3072       case PUB_Article:
3073         cap = (CitArtPtr) vnp->data.ptrvalue;
3074         if (cap!= NULL && cap->ids != NULL) {
3075           for (aip = cap->ids; aip != NULL; aip = aip->next) {
3076             if (aip->choice == ARTICLEID_PUBMED) {
3077               return aip->data.intvalue;
3078             }
3079           }
3080         }
3081       default :
3082         break;
3083     }
3084   }
3085 
3086   return 0;
3087 }
3088 
GetDOI(PubdescPtr pdp)3089 static CharPtr GetDOI (
3090   PubdescPtr pdp
3091 )
3092 
3093 {
3094   ArticleIdPtr  aip;
3095   CitArtPtr     cap;
3096   ValNodePtr    vnp;
3097 
3098   if (pdp == NULL) return 0;
3099 
3100   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3101     switch (vnp->choice) {
3102       case PUB_Article:
3103         cap = (CitArtPtr) vnp->data.ptrvalue;
3104         if (cap!= NULL && cap->ids != NULL) {
3105           for (aip = cap->ids; aip != NULL; aip = aip->next) {
3106             if (aip->choice == ARTICLEID_DOI) {
3107               return (CharPtr) aip->data.ptrvalue;
3108             }
3109           }
3110         }
3111       default :
3112         break;
3113     }
3114   }
3115 
3116   return 0;
3117 }
3118 
CleanQualValue(CharPtr str)3119 NLM_EXTERN CharPtr CleanQualValue (
3120   CharPtr str
3121 )
3122 
3123 {
3124   Char     ch;
3125   CharPtr  dst;
3126   CharPtr  ptr;
3127 
3128   if (str == NULL || str [0] == '\0') return NULL;
3129 
3130   dst = str;
3131   ptr = str;
3132   ch = *ptr;
3133   while (ch != '\0') {
3134     if (ch == '\n' || ch == '\r' || ch == '\t' || ch == '"') {
3135       *dst = ' ';
3136       dst++;
3137     } else {
3138       *dst = ch;
3139       dst++;
3140     }
3141     ptr++;
3142     ch = *ptr;
3143   }
3144   *dst = '\0';
3145 
3146   return str;
3147 }
3148 
3149 #define twocommas ((',') << 8 | (','))
3150 #define twospaces ((' ') << 8 | (' '))
3151 #define twosemicolons ((';') << 8 | (';'))
3152 #define space_comma ((' ') << 8 | (','))
3153 #define space_bracket ((' ') << 8 | (')'))
3154 #define bracket_space (('(') << 8 | (' '))
3155 #define space_semicolon ((' ') << 8 | (';'))
3156 #define comma_space ((',') << 8 | (' '))
3157 #define semicolon_space ((';') << 8 | (' '))
3158 
Asn2gnbkCompressSpaces(CharPtr str)3159 NLM_EXTERN CharPtr Asn2gnbkCompressSpaces (CharPtr str)
3160 
3161 {
3162   Char     ch;
3163   Char     last;
3164   CharPtr  dst;
3165   CharPtr  ptr;
3166 
3167   Char     curr;
3168   Char     next;
3169   CharPtr  in;
3170   CharPtr  out;
3171   Uint2    two_chars;
3172 
3173   if (str == NULL || str [0] == '\0') return str;
3174 
3175   in = str;
3176   out = str;
3177 
3178   curr = *in;
3179   in++;
3180 
3181   next = 0;
3182   two_chars = curr;
3183 
3184   while (curr != '\0') {
3185     next = *in;
3186     in++;
3187 
3188     two_chars = (two_chars << 8) | next;
3189 
3190     if (two_chars == twocommas) {
3191         *out++ = curr;
3192         next = ' ';
3193     } else if (two_chars == twospaces) {
3194     } else if (two_chars == twosemicolons) {
3195     } else if (two_chars == bracket_space) {
3196         next = curr;
3197         two_chars = curr;
3198     } else if (two_chars == space_bracket) {
3199     } else if (two_chars == space_comma) {
3200         *out++ = next;
3201         next = curr;
3202         *out++ = ' ';
3203         while (next == ' ' || next == ',') {
3204           next = *in;
3205           in++;
3206         }
3207         two_chars = next;
3208     } else if (two_chars == space_semicolon) {
3209         *out++ = next;
3210         next = curr;
3211         *out++ = ' ';
3212         while (next == ' ' || next == ';') {
3213           next = *in;
3214           in++;
3215         }
3216         two_chars = next;
3217     } else if (two_chars == comma_space) {
3218         *out++ = curr;
3219         *out++ = ' ';
3220         while (next == ' ' || next == ',') {
3221           next = *in;
3222           in++;
3223         }
3224         two_chars = next;
3225     } else if (two_chars == semicolon_space) {
3226         *out++ = curr;
3227         *out++ = ' ';
3228         while (next == ' ' || next == ';') {
3229           next = *in;
3230           in++;
3231         }
3232         two_chars = next;
3233     } else {
3234       *out++ = curr;
3235     }
3236 
3237      curr = next;
3238   }
3239 
3240   if (curr > 0 && curr != ' ') {
3241     *out = curr;
3242     out++;
3243   }
3244   *out = '\0';
3245 
3246   /* TrimSpacesAroundString but allow leading/trailing tabs/newlines */
3247 
3248   if (str != NULL && str [0] != '\0') {
3249     last = '\0';
3250     dst = str;
3251     ptr = str;
3252     ch = *ptr;
3253     while (ch != '\0' && ch == ' ') {
3254       ptr++;
3255       ch = *ptr;
3256     }
3257     while (ch != '\0') {
3258       *dst = ch;
3259       dst++;
3260       ptr++;
3261       last = ch;
3262       ch = *ptr;
3263       while (ch != '\0' && last == ' ' && ch == ' ') {
3264         ptr++;
3265         ch = *ptr;
3266       }
3267     }
3268     *dst = '\0';
3269     dst = NULL;
3270     ptr = str;
3271     ch = *ptr;
3272     while (ch != '\0') {
3273       if (ch != ' ') {
3274         dst = NULL;
3275       } else if (dst == NULL) {
3276         dst = ptr;
3277       }
3278       ptr++;
3279       ch = *ptr;
3280     }
3281     if (dst != NULL) {
3282       *dst = '\0';
3283     }
3284   }
3285 
3286   return str;
3287 }
3288 
StripAllSpaces(CharPtr str)3289 NLM_EXTERN CharPtr StripAllSpaces (
3290   CharPtr str
3291 )
3292 
3293 {
3294   Char     ch;
3295   CharPtr  dst;
3296   CharPtr  ptr;
3297 
3298   if (str == NULL || str [0] == '\0') return NULL;
3299 
3300   dst = str;
3301   ptr = str;
3302   ch = *ptr;
3303   while (ch != '\0') {
3304     if (ch == ' ' || ch == '\t') {
3305     } else {
3306       *dst = ch;
3307       dst++;
3308     }
3309     ptr++;
3310     ch = *ptr;
3311   }
3312   *dst = '\0';
3313 
3314   return str;
3315 }
3316 
3317 static CharPtr remarksText [] = {
3318   "full automatic", "full staff_review", "full staff_entry",
3319   "simple staff_review", "simple staff_entry", "simple automatic",
3320   "unannotated automatic", "unannotated staff_review", "unannotated staff_entry",
3321   NULL
3322 };
3323 
AddReferenceToGbseq(GBSeqPtr gbseq,GBReferencePtr gbref,CharPtr str,RefBlockPtr rbp,BioseqPtr bsp)3324 static void AddReferenceToGbseq (
3325   GBSeqPtr gbseq,
3326   GBReferencePtr gbref,
3327   CharPtr str,
3328   RefBlockPtr rbp,
3329   BioseqPtr bsp
3330 )
3331 
3332 {
3333   Char            buf [32];
3334   CharPtr         copy;
3335   ValNodePtr      head = NULL;
3336   IntRefBlockPtr  irp;
3337   SeqLocPtr       loc;
3338   CharPtr         ptr;
3339   CharPtr         ref;
3340   SeqLocPtr       slp;
3341   Int4            start;
3342   Int4            stop;
3343   CharPtr         tmp;
3344 
3345   if (gbseq == NULL || gbref == NULL || StringHasNoText (str) || rbp == NULL || bsp == NULL) return;
3346 
3347   copy = StringSave (str);
3348 
3349   /* link in reverse order, to be reversed in slash block */
3350 
3351   gbref->next = gbseq->references;
3352   gbseq->references = gbref;
3353 
3354   /* now parse or make ASN required default values for remaining fields */
3355 
3356   if (StringNCmp (copy, "REFERENCE   ", 12) == 0) {
3357     ref = copy + 12;
3358     ptr = StringStr (ref, "\n  AUTHORS");
3359     if (ptr == NULL) {
3360       ptr = StringStr (ref, "\n  CONSRTM");
3361     }
3362     if (ptr == NULL) {
3363       ptr = StringStr (ref, ")\n");
3364       if (ptr != NULL) {
3365         ptr++;
3366       }
3367     }
3368     if (ptr != NULL) {
3369       *ptr = '\0';
3370       /* gbref->reference = StringSave (ref); */
3371       sprintf (buf, "%d", (int) rbp->serial);
3372       gbref->reference = StringSave (buf);
3373     }
3374   }
3375 
3376   if (gbref->reference == NULL) {
3377     gbref->reference = StringSave ("?");
3378   }
3379 
3380   CleanQualValue (gbref->reference);
3381   Asn2gnbkCompressSpaces (gbref->reference);
3382 
3383   if (gbref->journal == NULL) {
3384     gbref->journal = StringSave ("?");
3385   }
3386 
3387   CleanQualValue (gbref->journal);
3388   Asn2gnbkCompressSpaces (gbref->journal);
3389 
3390   MemFree (copy);
3391 
3392   if (rbp->sites == 1 || rbp->sites == 2) {
3393     gbref->position = StringSave ("sites");
3394   } else if (rbp->sites == 3) {
3395   } else {
3396     irp = (IntRefBlockPtr) rbp;
3397     loc = irp->loc;
3398     if (loc != NULL) {
3399       slp = SeqLocFindNext (loc, NULL);
3400       while (slp != NULL) {
3401         start = SeqLocStart (slp) + 1;
3402         stop = SeqLocStop (slp) + 1;
3403         if (head == NULL) {
3404           sprintf (buf, "%ld..%ld", (long) start, (long) stop);
3405         } else {
3406           sprintf (buf, "; %ld..%ld", (long) start, (long) stop);
3407         }
3408         ValNodeCopyStr (&head, 0, buf);
3409         slp = SeqLocFindNext (loc, slp);
3410       }
3411       tmp = MergeFFValNodeStrs (head);
3412       ValNodeFreeData (head);
3413       gbref->position = tmp;
3414     } else {
3415       start = 1;
3416       stop = bsp->length;
3417       sprintf (buf, "%ld..%ld", (long) start, (long) stop);
3418       gbref->position = StringSave (buf);
3419     }
3420   }
3421 }
3422 
IsCitSub(PubdescPtr pdp,CitSubPtr csp)3423 static Boolean IsCitSub (
3424   PubdescPtr pdp,
3425   CitSubPtr csp
3426 )
3427 
3428 {
3429   ValNodePtr  vnp;
3430 
3431   if (csp != NULL) return TRUE;
3432   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3433     if (vnp->choice == PUB_Sub) return TRUE;
3434   }
3435   return FALSE;
3436 }
3437 
FF_www_muid(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,Int4 muid)3438 static void  FF_www_muid(
3439   IntAsn2gbJobPtr ajp,
3440   StringItemPtr ffstring,
3441   Int4 muid
3442 )
3443 {
3444   Char numbuf[40];
3445 
3446   if ( GetWWW(ajp) ) {
3447     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3448     FF_Add_NCBI_Base_URL (ffstring, link_muid);
3449     sprintf (numbuf, "%ld", (long)muid);
3450     FFAddTextToString (ffstring, NULL, numbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
3451     FFAddOneString (ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
3452     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3453   } else {
3454     sprintf(numbuf, "%ld", (long)muid);
3455     FFAddOneString (ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
3456   }
3457 }
3458 
GetJournalPubStatus(PubdescPtr pdp)3459 static Uint1 GetJournalPubStatus (PubdescPtr pdp)
3460 
3461 {
3462   CitArtPtr   cap;
3463   CitJourPtr  cjp;
3464   ImprintPtr  imp;
3465   ValNodePtr  vnp;
3466 
3467   if (pdp == NULL) return 0;
3468 
3469   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3470     if (vnp->choice != PUB_Article) continue;
3471     cap = (CitArtPtr) vnp->data.ptrvalue;
3472     if (cap == NULL) continue;
3473     if (cap->from != 1) continue;
3474     cjp = (CitJourPtr) cap->fromptr;
3475     if (cjp == NULL) continue;
3476     imp = cjp->imp;
3477     if (imp == NULL) continue;
3478     return imp->pubstatus;
3479   }
3480 
3481   return 0;
3482 }
3483 
FormatReferenceBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)3484 NLM_EXTERN CharPtr FormatReferenceBlock (
3485   Asn2gbFormatPtr afp,
3486   BaseBlockPtr bbp
3487 )
3488 
3489 {
3490   SeqMgrAndContext   acontext;
3491   AnnotDescPtr       adp;
3492   IntAsn2gbJobPtr    ajp;
3493   AuthListPtr        alp;
3494   Asn2gbSectPtr      asp;
3495   BioseqPtr          bsp;
3496   Char               buf [150];
3497   CitArtPtr          cap;
3498   Char               ch;
3499   CitJourPtr         cjp;
3500   Boolean            citArtIsoJta;
3501   CharPtr            consortium;
3502   CitPatPtr          cpp;
3503   CitRetractPtr      crp;
3504   CitSubPtr          csp = NULL;
3505   SeqMgrDescContext  dcontext;
3506   CharPtr            doi = NULL;
3507   SeqMgrFeatContext  fcontext;
3508   Int4               gibbsq;
3509   GBReferencePtr     gbref = NULL;
3510   GBSeqPtr           gbseq;
3511   GBXrefPtr          gxp;
3512   ValNodePtr         head;
3513   Int2               i;
3514   ArticleIdPtr       ids;
3515   ImprintPtr         imp;
3516   IndxPtr            index;
3517   IntRefBlockPtr     irp;
3518   Boolean            is_ed = FALSE;
3519   size_t             len;
3520   SeqLocPtr          loc = NULL;
3521   MedlineEntryPtr    mep;
3522   Int4               muid = 0;
3523   Boolean            needsPeriod = FALSE;
3524   SeqLocPtr          nextslp;
3525   Boolean            notFound;
3526   ObjMgrDataPtr      omdp;
3527   PubdescPtr         pdp = NULL;
3528   PubdescPtr         pdpcopy = NULL;
3529   PubmedEntryPtr     pep = NULL;
3530   Int4               pmid = 0;
3531   CharPtr            prefix = NULL;
3532   Uint1              pubstatus;
3533   CharPtr            pubstatnote;
3534   RefBlockPtr        rbp;
3535   ValNodePtr         remarks = NULL;
3536   CharPtr            remprefix = NULL;
3537   SubmitBlockPtr     sbp;
3538   SeqDescrPtr        sdp;
3539   ErrSev             sev;
3540   SeqFeatPtr         sfp = NULL;
3541   SeqIdPtr           sip;
3542   SeqLocPtr          slp;
3543   SeqSubmitPtr       ssp;
3544   Int4               start;
3545   Int4               stop;
3546   CharPtr            str = NULL;
3547   Boolean            strict_isojta;
3548   CharPtr            suffix = NULL;
3549   BioseqPtr          target;
3550   CharPtr            tmp;
3551   Boolean            trailingPeriod = TRUE;
3552   ValNodePtr         vnp;
3553   StringItemPtr      ffstring, temp;
3554 
3555   if (afp == NULL || bbp == NULL) return NULL;
3556   rbp = (RefBlockPtr) bbp;
3557   ajp = afp->ajp;
3558   if (ajp == NULL) return NULL;
3559   asp = afp->asp;
3560   if (asp == NULL) return NULL;
3561   target = asp->target;
3562   bsp = asp->bsp;
3563   if (target == NULL || bsp == NULL) return NULL;
3564 
3565   /* five-column feature table uses special code for formatting */
3566 
3567   if (ajp->format == FTABLE_FMT) {
3568     irp = (IntRefBlockPtr) bbp;
3569     if (irp->loc != NULL) {
3570       if (irp->rb.pmid != 0 || irp->rb.muid != 0) {
3571         head = NULL;
3572         PrintFtableIntervals (&head, target, irp->loc, "REFERENCE", FALSE);
3573         if (irp->rb.pmid != 0) {
3574           sprintf (buf, "\t\t\tpmid\t%ld\n", (long) irp->rb.pmid);
3575           ValNodeCopyStr (&head, 0, buf);
3576         } else if (irp->rb.muid != 0) {
3577           sprintf (buf, "\t\t\tmuid\t%ld\n", (long) irp->rb.muid);
3578           ValNodeCopyStr (&head, 0, buf);
3579         }
3580         str = MergeFFValNodeStrs (head);
3581         ValNodeFreeData (head);
3582       }
3583     }
3584     return str;
3585   }
3586 
3587   /* otherwise do regular flatfile formatting */
3588 
3589   ffstring = FFGetString(ajp);
3590   if ( ffstring == NULL ) return NULL;
3591 
3592   if (ajp->index) {
3593     index = &asp->index;
3594   } else {
3595     index = NULL;
3596   }
3597 
3598   if (ajp->gbseq) {
3599     gbseq = &asp->gbseq;
3600   } else {
3601     gbseq = NULL;
3602   }
3603 
3604   if (! StringHasNoText (rbp->string)) return StringSave (rbp->string);
3605 
3606   /* could be descriptor, feature, annotdesc, or submit block citation */
3607 
3608   if (rbp->itemtype == OBJ_SEQDESC) {
3609 
3610     sdp = SeqMgrGetDesiredDescriptor (rbp->entityID, NULL, rbp->itemID, 0, NULL, &dcontext);
3611     if (sdp != NULL && dcontext.seqdesctype == Seq_descr_pub) {
3612       pdp = (PubdescPtr) sdp->data.ptrvalue;
3613     }
3614 
3615   } else if (rbp->itemtype == OBJ_SEQFEAT) {
3616 
3617     sfp = SeqMgrGetDesiredFeature (rbp->entityID, NULL, rbp->itemID, 0, NULL, &fcontext);
3618     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_PUB) {
3619       pdp = (PubdescPtr) sfp->data.value.ptrvalue;
3620     }
3621 
3622   } else if (rbp->itemtype == OBJ_ANNOTDESC) {
3623 
3624     adp = SeqMgrGetDesiredAnnotDesc (rbp->entityID, NULL, rbp->itemID, &acontext);
3625     if (adp != NULL && acontext.annotdesctype == Annot_descr_pub) {
3626       pdp = (PubdescPtr) adp->data.ptrvalue;
3627     }
3628 
3629   } else if (rbp->itemtype == OBJ_SEQSUB_CIT) {
3630 
3631     omdp = ObjMgrGetData (rbp->entityID);
3632     if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
3633       ssp = (SeqSubmitPtr) omdp->dataptr;
3634       if (ssp != NULL && ssp->datatype == 1) {
3635         sbp = ssp->sub;
3636         if (sbp != NULL) {
3637           csp = sbp->cit;
3638         }
3639       }
3640     }
3641   }
3642 
3643   if (pdp == NULL && csp == NULL) return NULL;
3644 
3645   temp = FFGetString(ajp);
3646   if ( temp == NULL ) {
3647     FFRecycleString(ajp, ffstring);
3648     return NULL;
3649   }
3650 
3651   /* any justuids left at this point is RefSeq protein, and should be fetched */
3652 
3653   irp = (IntRefBlockPtr) rbp;
3654   if (irp->justuids) {
3655     sev = ErrSetMessageLevel (SEV_MAX);
3656     if (rbp->pmid != 0) {
3657       pep = GetPubMedForUid (rbp->pmid);
3658     } else if (rbp->muid != 0) {
3659       pep = GetPubMedForUid (rbp->muid);
3660     }
3661     ErrSetMessageLevel (sev);
3662     if (pep != NULL) {
3663       mep = (MedlineEntryPtr) pep->medent;
3664       if (mep != NULL && mep->cit != NULL) {
3665         pdpcopy = AsnIoMemCopy ((Pointer) pdp,
3666                                  (AsnReadFunc) PubdescAsnRead,
3667                                  (AsnWriteFunc) PubdescAsnWrite);
3668         cap = AsnIoMemCopy ((Pointer) mep->cit,
3669                             (AsnReadFunc) CitArtAsnRead,
3670                             (AsnWriteFunc) CitArtAsnWrite);
3671         vnp = ValNodeAddPointer (&(pdpcopy->pub), PUB_Article, (Pointer) cap);
3672         pdp = pdpcopy;
3673       }
3674     }
3675   }
3676 
3677   /* print serial number */
3678   FFStartPrint(temp, afp->format, 0, 12, "REFERENCE", 12, 5, 5, "RN", TRUE);
3679 
3680   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3681     if (rbp->serial > 99) {
3682       sprintf (buf, "%d ", (int) rbp->serial);
3683     } else {
3684       sprintf (buf, "%d", (int) rbp->serial);
3685     }
3686   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3687     sprintf (buf, "[%d]", (int) rbp->serial);
3688   }
3689 
3690   FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
3691 
3692   /* print base range */
3693 
3694   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3695 
3696     if (rbp->sites != 3) {
3697       FFAddNChar(temp, ' ', 15 - temp->pos, FALSE);
3698     }
3699   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3700 
3701     if (rbp->sites == 0) {
3702       FFLineWrap(ajp, ffstring, temp, 0, 5, ASN2FF_EMBL_MAX, "RN");
3703       FFRecycleString(ajp, temp);
3704       temp = FFGetString(ajp);
3705       FFStartPrint(temp, afp->format, 0, 0, NULL, 0, 5, 5, "RP", FALSE);
3706     }
3707   }
3708 
3709   if (rbp->sites == 1 || rbp->sites == 2) {
3710 
3711     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3712       FFAddOneString (temp, "(sites)", FALSE, FALSE, TILDE_TO_SPACES);
3713       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3714     } else {
3715       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
3716     }
3717   } else if (rbp->sites == 3) {
3718     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3719       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3720     } else {
3721       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
3722     }
3723   } else {
3724     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3725       FFAddNChar(temp, ' ', 15 - temp->pos, FALSE);
3726       if (afp->format == GENBANK_FMT) {
3727         FFAddOneString (temp, "(bases ", FALSE, FALSE, TILDE_TO_SPACES);
3728       } else {
3729         FFAddOneString (temp, "(residues ", FALSE, FALSE, TILDE_TO_SPACES);
3730       }
3731     }
3732 
3733     irp = (IntRefBlockPtr) rbp;
3734     loc = irp->loc;
3735 
3736     if (loc != NULL) {
3737       if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3738         suffix = "; ";
3739       } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3740         suffix = ", ";
3741       }
3742 
3743       slp = SeqLocFindNext (loc, NULL);
3744       while (slp != NULL) {
3745         nextslp = SeqLocFindNext (loc, slp);
3746         start = SeqLocStart (slp) + 1;
3747         stop = SeqLocStop (slp) + 1;
3748         if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3749           sprintf (buf, "%ld to %ld", (long) start, (long) stop);
3750         } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3751           sprintf (buf, "%ld-%ld", (long) start, (long) stop);
3752         }
3753         if (nextslp == NULL) {
3754           suffix = NULL;
3755         }
3756         FFAddTextToString (temp, NULL, buf, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3757         slp = nextslp;
3758       }
3759 
3760     } else {
3761 
3762       /* code still used for ssp->cit */
3763 
3764       start = 1;
3765       stop = bsp->length;
3766       if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3767         sprintf (buf, "%ld to %ld", (long) start, (long) stop);
3768       } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3769         sprintf (buf, "%ld-%ld", (long) start, (long) stop);
3770       }
3771       FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
3772     }
3773 
3774     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3775       FFAddOneString (temp, ")", FALSE, FALSE, TILDE_TO_SPACES);
3776     }
3777     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3778       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3779     } else {
3780       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
3781     }
3782   }
3783 
3784   if (gbseq != NULL) {
3785     gbref = GBReferenceNew ();
3786   }
3787 
3788   /* print author list */
3789 
3790   str = NULL;
3791   consortium = NULL;
3792 
3793   alp = GetAuthListPtr (pdp, csp);
3794   if (alp != NULL) {
3795     str = GetAuthorsString (afp->format, alp, &consortium, index, gbref);
3796     TrimSpacesAroundString (str);
3797     Asn2gnbkCompressSpaces (str);
3798     if (StringCmp (str, "?") == 0) {
3799       str = MemFree (str);
3800     }
3801   }
3802 
3803   if (str != NULL || StringHasNoText (consortium)) {
3804     FFRecycleString(ajp, temp);
3805     temp = FFGetString(ajp);
3806     FFStartPrint(temp, afp->format, 2, 12, "AUTHORS", 12, 5, 5, "RA", FALSE);
3807 
3808     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3809       suffix = NULL;
3810       trailingPeriod = TRUE;
3811     } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3812       trailingPeriod = FALSE;
3813       len = StringLen (str);
3814       if (len > 0 && str != NULL && str [len - 1] != '.') {
3815         suffix = ".;";
3816       } else {
3817         suffix = ";";
3818       }
3819     }
3820 
3821     /* if no authors were found, period will still be added by this call */
3822     if (str != NULL) {
3823       FFAddTextToString (temp, NULL, str, suffix, trailingPeriod, FALSE, TILDE_TO_SPACES);
3824     } else if (StringHasNoText (consortium)) {
3825       if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3826         FFAddOneChar(temp, '.', FALSE);
3827       } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3828         FFAddOneChar(temp, ';', FALSE);
3829       }
3830     }
3831 
3832     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3833       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3834     } else {
3835       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA");
3836     }
3837   }
3838   MemFree (str);
3839 
3840   /* print consortium */
3841 
3842   FFRecycleString(ajp, temp);
3843   temp = FFGetString(ajp);
3844   if (! StringHasNoText (consortium)) {
3845     FFStartPrint (temp, afp->format, 2, 12, "CONSRTM", 12, 5, 5, "RG", FALSE);
3846     FFAddTextToString (temp, NULL, consortium, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3847     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3848       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3849     } else {
3850       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RG");
3851     }
3852   }
3853   MemFree (consortium);
3854 
3855   /* print title */
3856   FFRecycleString(ajp, temp);
3857   temp = FFGetString(ajp);
3858 
3859   str = GetPubTitle (afp->format, pdp, csp);
3860   CleanPubTitle (str);
3861   StrStripSpaces (str);
3862 
3863   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3864     prefix = NULL;
3865     suffix = NULL;
3866   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3867     if (str != NULL) {
3868       prefix = "\"";
3869       suffix = "\";";
3870     } else {
3871       prefix = NULL;
3872       suffix = ";";
3873     }
3874   }
3875 
3876   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3877     if (! StringHasNoText (str)) {
3878       FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE);
3879 
3880       FFAddTextToString (temp, prefix, str, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3881       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3882     }
3883   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3884     FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE);
3885     if (! StringHasNoText (str)) {
3886 
3887       FFAddTextToString (temp, prefix, str, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3888 
3889     } else {
3890       FFAddOneChar (temp, ';', FALSE);
3891     }
3892     FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RT");
3893   }
3894 
3895   if (gbseq != NULL) {
3896     if (gbref != NULL) {
3897       gbref->title = StringSaveNoNull (str);
3898     }
3899   }
3900 
3901   MemFree (str);
3902 
3903   /* print journal */
3904   FFRecycleString(ajp, temp);
3905   temp = FFGetString(ajp);
3906 
3907   FFStartPrint (temp, afp->format, 2, 12, "JOURNAL", 12, 5, 5, "RL", FALSE);
3908 
3909   /* Only GenBank/EMBL/DDBJ require ISO JTA in ENTREZ/RELEASE modes (RefSeq should later) */
3910 
3911   citArtIsoJta = ajp->flags.citArtIsoJta;
3912   strict_isojta = FALSE;
3913   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3914     if (sip->choice == SEQID_GENBANK ||
3915         sip->choice == SEQID_EMBL ||
3916         sip->choice == SEQID_DDBJ ||
3917         /* sip->choice == SEQID_OTHER || */
3918         sip->choice == SEQID_TPG ||
3919         sip->choice == SEQID_TPE ||
3920         sip->choice == SEQID_TPD) {
3921       strict_isojta = TRUE;
3922     }
3923     if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
3924       is_ed = TRUE;
3925     }
3926   }
3927   if (! strict_isojta) {
3928     citArtIsoJta = FALSE;
3929   }
3930 
3931   str = GetPubJournal (afp->format, ajp->mode, ajp->flags.dropBadCitGens,
3932                        is_ed, ajp->flags.noAffilOnUnpub, citArtIsoJta,
3933                        pdp, csp, bsp->id, index, ajp);
3934   if (str == NULL) {
3935     str = StringSave ("Unpublished");
3936   }
3937   StrStripSpaces (str);
3938   TrimSpacesAroundString (str);
3939   Asn2gnbkCompressSpaces (str);
3940 
3941   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3942     needsPeriod = FALSE;
3943   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3944     if (! IsCitSub (pdp, csp)) {
3945       needsPeriod = TRUE;
3946     }
3947   }
3948 
3949   FFAddOneString (temp, str, FALSE, FALSE, TILDE_IGNORE);
3950   if (needsPeriod) {
3951     FFAddOneChar(temp, '.', FALSE);
3952   }
3953 
3954   if (gbseq != NULL) {
3955     if (gbref != NULL) {
3956       gbref->journal = StringSaveNoNull (str);
3957       tmp = gbref->journal;
3958       if (tmp != NULL) {
3959         ch = *tmp;
3960         while (ch != '\0') {
3961           if (ch == '\n' || ch == '\r' || ch == '\t') {
3962             *tmp = ' ';
3963           }
3964           tmp++;
3965           ch = *tmp;
3966         }
3967         TrimSpacesAroundString (gbref->journal);
3968       }
3969     }
3970   }
3971 
3972   MemFree (str);
3973   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3974     FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3975   } else {
3976     FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RL");
3977   }
3978 
3979   if (gbseq != NULL) {
3980     if (gbref != NULL) {
3981       if (pdp != NULL) {
3982         for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3983           if (vnp->choice == PUB_Article) {
3984             cap = (CitArtPtr) vnp->data.ptrvalue;
3985             if (cap != NULL) {
3986               for (ids = cap->ids; ids != NULL; ids = ids->next) {
3987                 if (ids->choice == ARTICLEID_DOI) {
3988                   tmp = (CharPtr) ids->data.ptrvalue;
3989                   if (StringDoesHaveText (tmp) && StringNCmp (tmp, "10.", 3) == 0) {
3990                     gxp = GBXrefNew ();
3991                     if (gxp != NULL) {
3992                       gxp->dbname = StringSave ("doi");
3993                       gxp->id = StringSave (tmp);
3994                       gxp->next = gbref->xref;
3995                       gbref->xref = gxp;
3996                     }
3997                   }
3998                 }
3999               }
4000             }
4001           }
4002         }
4003       }
4004     }
4005   }
4006 
4007   /* print muid */
4008   FFRecycleString(ajp, temp);
4009   temp = FFGetString(ajp);
4010 
4011   pmid = GetPmid (pdp);
4012   muid = GetMuid (pdp);
4013 
4014   if (pmid == 0 && muid > 0) {
4015     FFStartPrint (temp, afp->format, 2, 12, "MEDLINE", 12, 5, 5, "RX", FALSE);
4016 
4017     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4018       FF_www_muid (ajp, temp, muid);
4019       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4020     } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4021       sprintf (buf, "MEDLINE; %ld.", (long) muid);
4022       FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
4023       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
4024     }
4025   }
4026 
4027   FFRecycleString(ajp, temp);
4028   temp = FFGetString(ajp);
4029 
4030   if (pmid > 0) {
4031     FFStartPrint (temp, afp->format, 3, 12, "PUBMED", 12, 5, 5, "RX", FALSE);
4032     if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4033       FF_www_muid (ajp, temp, pmid);
4034       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4035     } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4036       sprintf (buf, "PUBMED; %ld.", (long) pmid);
4037       FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
4038       FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
4039     }
4040   }
4041   FFRecycleString(ajp, temp);
4042 
4043   if (gbseq != NULL) {
4044     if (gbref != NULL) {
4045       gbref->pubmed = pmid;
4046     }
4047   }
4048 
4049   if (pdp == NULL) {
4050 
4051     if (csp != NULL) {
4052       if (! StringHasNoText (csp->descr)) {
4053         FFRecycleString(ajp, temp);
4054         temp = FFGetString(ajp);
4055 
4056         ValNodeCopyStr (&remarks, 0, csp->descr);
4057         FFStartPrint (temp, afp->format, 2, 12, "REMARK", 12, 5, 5, NULL, FALSE);
4058         /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
4059         AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
4060         FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4061       }
4062     }
4063 
4064     str = FFToCharPtr(ffstring);
4065 
4066     if (gbseq != NULL) {
4067       if (gbref != NULL) {
4068         AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp);
4069       }
4070     }
4071 
4072     FFRecycleString(ajp, ffstring);
4073     FFRecycleString(ajp, temp);
4074     if (pep != NULL) {
4075       PubmedEntryFree (pep);
4076     }
4077     if (pdpcopy != NULL) {
4078       PubdescFree (pdpcopy);
4079     }
4080 
4081     return str;
4082   }
4083 
4084 
4085   /* !!! remainder of fields are only for GenBank !!! */
4086 
4087   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4088 
4089     prefix = "REMARK";
4090 
4091     cpp = NULL;
4092     for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4093       if (vnp->choice == PUB_Patent) {
4094         cpp = (CitPatPtr) vnp->data.ptrvalue;
4095       }
4096     }
4097     if (cpp != NULL && ajp != NULL && ajp->mode == ENTREZ_MODE) {
4098       if (StringCmp (cpp->country, "US") == 0) {
4099         if (StringDoesHaveText (cpp->number)) {
4100           FFRecycleString(ajp, temp);
4101           temp = FFGetString(ajp);
4102 
4103           sprintf (buf, "CAMBIA Patent Lens: %s %s", cpp->country, cpp->number);
4104           if (remprefix != NULL) {
4105             ValNodeCopyStr (&remarks, 0, remprefix);
4106           }
4107           ValNodeCopyStr (&remarks, 0, buf);
4108           remprefix = "; ";
4109           FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4110           if (GetWWW (ajp)) {
4111             sprintf (buf, "CAMBIA Patent Lens: %s ", cpp->country);
4112             FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
4113             FFAddOneString (temp, "<a href=\"", FALSE, FALSE, TILDE_EXPAND);
4114             FFAddOneString (temp, link_cambia, FALSE, FALSE, TILDE_EXPAND);
4115             FFAddOneString (temp, cpp->country, FALSE, FALSE, TILDE_EXPAND);
4116             FFAddOneString (temp, cpp->number, FALSE, FALSE, TILDE_EXPAND);
4117             FFAddOneString (temp, "#list\">", FALSE, FALSE, TILDE_EXPAND);
4118             FFAddOneString (temp, cpp->number, FALSE, FALSE, TILDE_EXPAND);
4119             FFAddOneString (temp, "</a>", FALSE, FALSE, TILDE_EXPAND);
4120           } else {
4121             FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
4122           }
4123           FFLineWrap (ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4124           prefix = NULL;
4125         }
4126       }
4127     }
4128 
4129     if (pdp->comment != NULL) {
4130       for (i = 0, notFound = TRUE; notFound && remarksText [i] != NULL; i++) {
4131         if (StringCmp (pdp->comment, remarksText [i]) == 0) {
4132           notFound = FALSE;
4133         }
4134       }
4135       if (notFound) {
4136         FFRecycleString(ajp, temp);
4137         temp = FFGetString(ajp);
4138 
4139         if (remprefix != NULL) {
4140           ValNodeCopyStr (&remarks, 0, remprefix);
4141         }
4142         ValNodeCopyStr (&remarks, 0, pdp->comment);
4143         remprefix = "; ";
4144         FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4145         FFAddOneString (temp, pdp->comment, FALSE, TRUE, TILDE_EXPAND);
4146         /* AddCommentWithURLlinks(ajp, temp, NULL, pdp->comment, NULL); */
4147         FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4148         prefix = NULL;
4149 
4150         if (gbseq != NULL) {
4151           if (gbref != NULL) {
4152             /*
4153             gbref->remark = StringSave (pdp->comment);
4154             */
4155           }
4156         }
4157 
4158       }
4159     }
4160 
4161     gibbsq = 0;
4162     for (sip = bsp->id; sip != NULL; sip = sip->next) {
4163       if (sip->choice == SEQID_GIBBSQ) {
4164         gibbsq = sip->data.intvalue;
4165       }
4166     }
4167     csp = NULL;
4168     for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4169       if (vnp->choice == PUB_Sub) {
4170         csp = (CitSubPtr) vnp->data.ptrvalue;
4171       }
4172     }
4173     if (gibbsq > 0 /* && csp == NULL */) {
4174       FFRecycleString(ajp, temp);
4175       temp = FFGetString(ajp);
4176 
4177       sprintf (buf, "GenBank staff at the National Library of Medicine created this entry [NCBI gibbsq %ld] from the original journal article.", (long) gibbsq);
4178       if (remprefix != NULL) {
4179         ValNodeCopyStr (&remarks, 0, remprefix);
4180       }
4181       ValNodeCopyStr (&remarks, 0, buf);
4182       remprefix = "; ";
4183       FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4184       FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
4185       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4186       prefix = NULL;
4187 
4188       /* gibbsq comment section (fields may be copied from degenerate pubdesc) */
4189 
4190       str = pdp->fig;
4191       if (StringHasNoText (str)) {
4192         str = irp->fig;
4193       }
4194       if (! StringHasNoText (str)) {
4195         FFRecycleString(ajp, temp);
4196         temp = FFGetString(ajp);
4197 
4198         sprintf (buf, "This sequence comes from %s", str);
4199         if (remprefix != NULL) {
4200           ValNodeCopyStr (&remarks, 0, remprefix);
4201         }
4202         ValNodeCopyStr (&remarks, 0, buf);
4203         remprefix = "; ";
4204         FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4205         FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
4206         FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4207         prefix = NULL;
4208       }
4209 
4210       if (pdp->poly_a || irp->poly_a) {
4211         FFRecycleString(ajp, temp);
4212         temp = FFGetString(ajp);
4213 
4214         if (remprefix != NULL) {
4215           ValNodeCopyStr (&remarks, 0, remprefix);
4216         }
4217         ValNodeCopyStr (&remarks, 0, "Polyadenylate residues occurring in the figure were omitted from the sequence.");
4218         remprefix = "; ";
4219         FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4220         FFAddOneString (temp, "Polyadenylate residues occurring in the figure were omitted from the sequence.", TRUE, TRUE, TILDE_EXPAND);
4221         FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4222         prefix = NULL;
4223       }
4224 
4225       str = pdp->maploc;
4226       if (StringHasNoText (str)) {
4227         str = irp->maploc;
4228       }
4229       if (! StringHasNoText (str)) {
4230         FFRecycleString(ajp, temp);
4231         temp = FFGetString(ajp);
4232 
4233         sprintf (buf, "Map location: %s", str);
4234         if (remprefix != NULL) {
4235           ValNodeCopyStr (&remarks, 0, remprefix);
4236         }
4237         ValNodeCopyStr (&remarks, 0, buf);
4238         remprefix = "; ";
4239         FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4240         FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
4241         FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4242         prefix = NULL;
4243       }
4244 
4245     }
4246 
4247     for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4248       if (vnp->choice == PUB_Article) {
4249         cap = (CitArtPtr) vnp->data.ptrvalue;
4250         if (cap != NULL && cap->from == 1) {
4251           cjp = (CitJourPtr) cap->fromptr;
4252           if (cjp != NULL) {
4253             imp = cjp->imp;
4254             if (imp != NULL) {
4255               crp = imp->retract;
4256               if (crp != NULL) {
4257                 if (crp->type == 1) {
4258                   FFRecycleString(ajp, temp);
4259                   temp = FFGetString(ajp);
4260 
4261                   len = StringLen (crp->exp) + 30;
4262                   str = MemNew (sizeof (Char) * len);
4263                   if (str != NULL) {
4264                     StringCpy (str, "Retracted");
4265                     if (StringDoesHaveText (crp->exp)) {
4266                       StringCat (str, ":[");
4267                       StringCat (str, crp->exp);
4268                       StringCat (str, "]");
4269                     }
4270                     if (remprefix != NULL) {
4271                       ValNodeCopyStr (&remarks, 0, remprefix);
4272                     }
4273                     ValNodeCopyStr (&remarks, 0, str);
4274                     remprefix = "; ";
4275                     str = MemFree (str);
4276                   }
4277                   FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4278                   FFAddOneString (temp, "Retracted", FALSE, FALSE, TILDE_TO_SPACES);
4279                   if (StringDoesHaveText (crp->exp)) {
4280                     FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
4281                   }
4282                   FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4283                   prefix = NULL;
4284                 } else if (crp->type == 3) {
4285                   FFRecycleString(ajp, temp);
4286                   temp = FFGetString(ajp);
4287 
4288                   len = StringLen (crp->exp) + 30;
4289                   str = MemNew (sizeof (Char) * len);
4290                   if (str != NULL) {
4291                     StringCpy (str, "Erratum");
4292                     if (StringDoesHaveText (crp->exp)) {
4293                       StringCat (str, ":[");
4294                       StringCat (str, crp->exp);
4295                       StringCat (str, "]");
4296                     }
4297                     if (remprefix != NULL) {
4298                       ValNodeCopyStr (&remarks, 0, remprefix);
4299                     }
4300                     ValNodeCopyStr (&remarks, 0, str);
4301                     remprefix = "; ";
4302                     str = MemFree (str);
4303                   }
4304                   FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4305                   FFAddOneString (temp, "Erratum", FALSE, FALSE, TILDE_TO_SPACES);
4306                   if (StringDoesHaveText (crp->exp)) {
4307                     FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
4308                   }
4309                   FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4310                   prefix = NULL;
4311                 } else if (crp->type == 4) {
4312                   FFRecycleString(ajp, temp);
4313                   temp = FFGetString(ajp);
4314 
4315                   len = StringLen (crp->exp) + 30;
4316                   str = MemNew (sizeof (Char) * len);
4317                   if (str != NULL) {
4318                     StringCpy (str, "Correction");
4319                     if (StringDoesHaveText (crp->exp)) {
4320                       StringCat (str, " to:[");
4321                       StringCat (str, crp->exp);
4322                       StringCat (str, "]");
4323                     }
4324                     if (remprefix != NULL) {
4325                       ValNodeCopyStr (&remarks, 0, remprefix);
4326                     }
4327                     ValNodeCopyStr (&remarks, 0, str);
4328                     remprefix = "; ";
4329                     str = MemFree (str);
4330                   }
4331                   FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4332                   FFAddOneString (temp, "Correction", FALSE, FALSE, TILDE_TO_SPACES);
4333                   if (StringDoesHaveText (crp->exp)) {
4334                     FFAddTextToString (temp, " to:[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
4335                   }
4336                   FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4337                   prefix = NULL;
4338                 }
4339               }
4340             }
4341           }
4342         }
4343       } else if (vnp->choice == PUB_Sub) {
4344         csp = (CitSubPtr) vnp->data.ptrvalue;
4345         if (csp != NULL) {
4346           if (! StringHasNoText (csp->descr)) {
4347             FFRecycleString(ajp, temp);
4348             temp = FFGetString(ajp);
4349 
4350             if (remprefix != NULL) {
4351               ValNodeCopyStr (&remarks, 0, remprefix);
4352             }
4353             ValNodeCopyStr (&remarks, 0, csp->descr);
4354             remprefix = "; ";
4355             FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4356             /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
4357             AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
4358             FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4359             prefix = NULL;
4360           }
4361         }
4362       }
4363     }
4364 
4365     pubstatnote = NULL;
4366     pubstatus = GetJournalPubStatus (pdp);
4367     if (pubstatus == 3) {
4368       pubstatnote = "Publication Status: Online-Only";
4369     } else if (pubstatus == 10) {
4370       pubstatnote = "Publication Status: Available-Online prior to print";
4371     }
4372     if (StringDoesHaveText (pubstatnote)) {
4373       FFRecycleString(ajp, temp);
4374       temp = FFGetString(ajp);
4375 
4376       if (remprefix != NULL) {
4377         ValNodeCopyStr (&remarks, 0, remprefix);
4378       }
4379       ValNodeCopyStr (&remarks, 0, pubstatnote);
4380       remprefix = "; ";
4381       FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4382       FFAddOneString (temp, pubstatnote, FALSE, FALSE, TILDE_EXPAND);
4383       FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4384       prefix = NULL;
4385     }
4386 
4387   }
4388 
4389     if (pmid == 0 && muid == 0) {
4390       doi = GetDOI (pdp);
4391       if (StringDoesHaveText (doi) && StringNCmp (doi, "10.", 3) == 0) {
4392         FFRecycleString(ajp, temp);
4393         temp = FFGetString(ajp);
4394 
4395         if (remprefix != NULL) {
4396           ValNodeCopyStr (&remarks, 0, remprefix);
4397         }
4398         remprefix = "; ";
4399         FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4400         if (GetWWW (ajp) && (! CommentHasSuspiciousHtml (ajp, doi))) {
4401           FFAddOneString (temp, "DOI: ", FALSE, FALSE, TILDE_EXPAND);
4402           FFAddOneString (temp, "<a href=\"", FALSE, FALSE, TILDE_EXPAND);
4403           FFAddOneString (temp, link_doi, FALSE, FALSE, TILDE_EXPAND);
4404           FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
4405           FFAddOneString (temp, "\">", FALSE, FALSE, TILDE_EXPAND);
4406           FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
4407           FFAddOneString (temp, "</a>", FALSE, FALSE, TILDE_EXPAND);
4408         } else {
4409           FFAddOneString (temp, "DOI: ", FALSE, FALSE, TILDE_EXPAND);
4410           FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
4411         }
4412         FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4413         prefix = NULL;
4414       }
4415     }
4416 
4417   str = FFToCharPtr(ffstring);
4418 
4419   if (gbseq != NULL) {
4420     if (gbref != NULL) {
4421       if (remarks != NULL) {
4422         gbref->remark = MergeFFValNodeStrs (remarks);
4423       }
4424 
4425       AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp);
4426     }
4427   }
4428   ValNodeFreeData (remarks);
4429 
4430   FFRecycleString(ajp, ffstring);
4431   FFRecycleString(ajp, temp);
4432   if (pep != NULL) {
4433     PubmedEntryFree (pep);
4434   }
4435   if (pdpcopy != NULL) {
4436     PubdescFree (pdpcopy);
4437   }
4438 
4439   return str;
4440 }
4441 
4442 
4443