1 /* asn2gnb5.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb5.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 * Version Creation Date: 10/21/98
32 *
33 * $Revision: 1.286 $
34 *
35 * File Description: New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59
60 #ifdef WIN_MAC
61 #if __profile__
62 #include <Profiler.h>
63 #endif
64 #endif
65
66 /* URLs */
67
68
69 static CharPtr link_muid = "https://www.ncbi.nlm.nih.gov/pubmed/";
70
71 static CharPtr link_uspto = "http://patft.uspto.gov/netacgi/nph-Parser?patentnumber=";
72
73 static CharPtr link_cambia = "http://www.patentlens.net/patentlens/simple.cgi?patnum=";
74
75 static CharPtr link_doi = "http://dx.doi.org/";
76
77
78 /* www utility functions */
79
GetWWW(IntAsn2gbJobPtr ajp)80 NLM_EXTERN Boolean GetWWW (IntAsn2gbJobPtr ajp) {
81 return ajp->www;
82 }
83
FiniWWW(IntAsn2gbJobPtr ajp)84 NLM_EXTERN void FiniWWW (IntAsn2gbJobPtr ajp) {
85 ajp->www = FALSE;
86 }
87
InitWWW(IntAsn2gbJobPtr ajp)88 NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp)
89 {
90 ajp->www = TRUE;
91 }
92
FF_www_featloc(StringItemPtr ffstring,CharPtr loc)93 NLM_EXTERN void FF_www_featloc(StringItemPtr ffstring, CharPtr loc)
94 {
95 CharPtr ptr;
96
97 if (loc == NULL) return;
98
99 for ( ptr = loc; *ptr != '\0'; ++ptr ) {
100 switch (*ptr) {
101 case '<' :
102 /*FFAddOneString (ffstring, "<", FALSE, FALSE, TILDE_IGNORE);*/
103 FFAddOneString (ffstring, "<", FALSE, FALSE, TILDE_IGNORE);
104 break;
105 case '>' :
106 /*FFAddOneString (ffstring, ">", FALSE, FALSE, TILDE_IGNORE);*/
107 FFAddOneString (ffstring, ">", FALSE, FALSE, TILDE_IGNORE);
108 break;
109 default:
110 FFAddOneChar(ffstring, *ptr, FALSE);
111 break;
112 }
113 }
114 }
115
116
117 /* ************** */
118
LooksLikeAccession(CharPtr accession,Int2Ptr alphaP,Int2Ptr digitP,Int2Ptr unscrP)119 static Boolean LooksLikeAccession (
120 CharPtr accession,
121 Int2Ptr alphaP,
122 Int2Ptr digitP,
123 Int2Ptr unscrP
124 )
125
126 {
127 Char ch;
128 Int2 numAlpha = 0;
129 Int2 numDigits = 0;
130 Int2 numUndersc = 0;
131 CharPtr str;
132
133 if (accession == NULL || accession [0] == '\0') return FALSE;
134
135 if (StringLen (accession) >= 16) return FALSE;
136
137 if (accession [0] < 'A' || accession [0] > 'Z') return FALSE;
138
139 str = accession;
140 if (StringNCmp (str, "NZ_", 3) == 0) {
141 str += 3;
142 }
143 ch = *str;
144 while (IS_ALPHA (ch)) {
145 numAlpha++;
146 str++;
147 ch = *str;
148 }
149 while (ch == '_') {
150 numUndersc++;
151 str++;
152 ch = *str;
153 }
154 while (IS_DIGIT (ch)) {
155 numDigits++;
156 str++;
157 ch = *str;
158 }
159 if (ch != '\0' && ch != ' ' && ch != '.') return FALSE;
160
161 if (numUndersc > 1) return FALSE;
162
163 if (alphaP != NULL) {
164 *alphaP = numAlpha;
165 }
166 if (digitP != NULL) {
167 *digitP = numDigits;
168 }
169 if (unscrP != NULL) {
170 *unscrP = numUndersc;
171 }
172
173 if (numUndersc == 0) {
174 if (numAlpha == 1 && numDigits == 5) return TRUE;
175 if (numAlpha == 2 && numDigits == 6) return TRUE;
176 if (numAlpha == 3 && numDigits == 5) return TRUE;
177 if (numAlpha == 4 && numDigits == 8) return TRUE;
178 if (numAlpha == 4 && numDigits == 9) return TRUE;
179 if (numAlpha == 5 && numDigits == 7) return TRUE;
180 } else if (numUndersc == 1) {
181 if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return FALSE;
182 if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
183 if (accession [1] == 'M' ||
184 accession [1] == 'C' ||
185 accession [1] == 'T' ||
186 accession [1] == 'P' ||
187 accession [1] == 'G' ||
188 accession [1] == 'R' ||
189 accession [1] == 'S' ||
190 accession [1] == 'W' ||
191 accession [1] == 'Z') {
192 return TRUE;
193 }
194 }
195 if (accession [0] == 'A' || accession [0] == 'Y') {
196 if (accession [1] == 'P') return TRUE;
197 }
198 }
199
200 return FALSE;
201 }
202
203 typedef struct dbxrefurldata {
204 CharPtr tag;
205 CharPtr url;
206 } UrlData, PNTR UrlDataPtr;
207
208 static UrlData Nlm_url_base [] = {
209 {"AceView/WormGenes", "https://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?db=worm&c=gene&q="},
210 {"AFTOL", "http://wasabi.lutzonilab.net/pub/displayTaxonInfo?aftol_id="},
211 {"AntWeb", "http://www.antweb.org/specimen.do?name="},
212 {"APHIDBASE", "http://bipaa.genouest.org/apps/grs-2.3/grs?reportID=aphidbase_transcript_report&objectID="},
213 {"ApiDB", "http://www.apidb.org/apidb/showRecord.do?name=GeneRecordClasses.ApiDBGeneRecordClass&primary_key="},
214 {"ApiDB_CryptoDB", "http://cryptodb.org/cryptodb/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=CryptoDB&source_id="},
215 {"ApiDB_PlasmoDB", "http://plasmodb.org/plasmo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=PlasmoDB&source_id="},
216 {"ApiDB_ToxoDB", "http://toxodb.org/toxo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=ToxoDB&source_id="},
217 {"Araport", "https://www.araport.org/locus/"},
218 {"ASAP", "https://asap.genetics.wisc.edu/asap/feature_info.php?FeatureID="},
219 {"ATCC", "http://www.atcc.org/Products/All/"},
220 {"Axeldb", "http://www.dkfz-heidelberg.de/tbi/services/axeldb/clone/xenopus?name="},
221 {"BEEBASE", "http://hymenopteragenome.org/cgi-bin/gb2/gbrowse/bee_genome45/?name="},
222 {"BEETLEBASE", "http://www.beetlebase.org/cgi-bin/report.cgi?name="},
223 {"BEI", "https://www.beiresources.org/Catalog/animalViruses/"},
224 {"BGD", "http://bovinegenome.org/genepages/btau40/genes/"},
225 {"BioProject", "https://www.ncbi.nlm.nih.gov/bioproject/"},
226 {"BioSample", "https://www.ncbi.nlm.nih.gov/biosample/"},
227 {"BOLD", "http://www.boldsystems.org/connectivity/specimenlookup.php?processid="},
228 {"CCDS", "https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA="},
229 {"CDD", "https://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid="},
230 {"CGD", "http://www.candidagenome.org/cgi-bin/locus.pl?locus="},
231 {"CGNC", "http://birdgenenames.org/cgnc/GeneReport?id="},
232 {"CK", "http://flybane.berkeley.edu/cgi-bin/cDNA/CK_clone.pl?db=CK&dbid="},
233 {"COG", "https://www.ncbi.nlm.nih.gov/COG/new/release/cow.cgi?cog="},
234 {"CollecTF", "http://collectf.umbc.edu/"},
235 {"dbClone", "https://www.ncbi.nlm.nih.gov/sites/entrez?db=clone&cmd=Retrieve&list_uids="},
236 {"dbCloneLib", "https://www.ncbi.nlm.nih.gov/sites/entrez?db=clonelib&cmd=Retrieve&list_uids="},
237 {"dbEST", "https://www.ncbi.nlm.nih.gov/nucest/"},
238 {"dbProbe", "https://www.ncbi.nlm.nih.gov/sites/entrez?db=probe&cmd=Retrieve&list_uids="},
239 {"dbSNP", "https://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs="},
240 {"dbSTS", "https://www.ncbi.nlm.nih.gov/nuccore/"},
241 {"dictyBase", "http://dictybase.org/db/cgi-bin/gene_page.pl?dictybaseid="},
242 {"DSM", "https://www.dsmz.de/catalogues/details/culture/DSM-"},
243 {"DSMZ", "https://www.dsmz.de/catalogues/details/culture/PV-"},
244 {"ECOCYC", "http://biocyc.org/ECOLI/new-image?type=GENE&object="},
245 {"EcoGene", "http://www.ecogene.org/gene/"},
246 {"ENSEMBL", "http://www.ensembl.org/id/"},
247 {"EnsemblGenomes", "http://ensemblgenomes.org/id/"},
248 {"EnsemblGenomes-Gn", "http://ensemblgenomes.org/id/"},
249 {"EnsemblGenomes-Tr", "http://ensemblgenomes.org/id/"},
250 {"FANTOM_DB", "http://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid="},
251 {"FBOL", "http://www.fungalbarcoding.org/BioloMICS.aspx?Table=Fungal%20barcodes&Fields=All&Rec="},
252 {"FLYBASE", "http://flybase.bio.indiana.edu/.bin/fbidq.html?"},
253 {"Fungorum", "http://www.indexfungorum.org/Names/NamesRecord.asp?RecordID="},
254 {"GABI", "http://www.gabipd.org/database/cgi-bin/GreenCards.pl.cgi?Mode=ShowSequence&App=ncbi&SequenceId="},
255 {"GenBank", "https://www.ncbi.nlm.nih.gov/nuccore/"},
256 {"GeneDB", "http://old.genedb.org/genedb/Search?organism=All%3A*&name="},
257 {"GeneID", "https://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=Retrieve&dopt=full_report&list_uids="},
258 {"GO", "http://amigo.geneontology.org/amigo/term/GO:"},
259 {"GOA", "http://www.ebi.ac.uk/ego/GProtein?ac="},
260 {"Greengenes", "http://greengenes.lbl.gov/cgi-bin/show_one_record_v2.pl?prokMSA_id="},
261 {"GRIN", "http://www.ars-grin.gov/cgi-bin/npgs/acc/display.pl?"},
262 {"H-InvDB", "http://www.h-invitational.jp"},
263 {"HGNC", "http://www.genenames.org/cgi-bin/gene_symbol_report?hgnc_id=HGNC:"},
264 {"HMP", "http://www.hmpdacc.org/catalog/grid.php?dataset=genomic&hmp_id="},
265 {"HOMD", "http://www.homd.org/"},
266 {"HPM", "http://www.humanproteomemap.org/protein.php?hpm_id="},
267 {"HPRD", "http://www.hprd.org/protein/"},
268 {"HSSP", "http://mrs.cmbi.ru.nl/m6/search?db=all&q="},
269 {"I5KNAL", "https://i5k.nal.usda.gov/"},
270 {"IKMC", "http://www.knockoutmouse.org/martsearch/project/"},
271 {"IMGT/GENE-DB", "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Homo+sapiens&query=2+"},
272 {"IMGT/HLA", "http://www.ebi.ac.uk/cgi-bin/ipd/imgt/hla/get_allele.cgi?"},
273 {"IMGT/LIGM", "http://www.imgt.org/cgi-bin/IMGTlect.jv?query=201+"},
274 {"InterimID", "https://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
275 {"InterPro", "http://www.ebi.ac.uk/interpro/ISearch?mode=ipr&query="},
276 {"IntrepidBio", "http://server1.intrepidbio.com/FeatureBrowser/gene/browse/"},
277 {"IRD", "http://www.fludb.org/brc/fluSegmentDetails.do?irdSubmissionId="},
278 {"ISD", "http://www.flu.lanl.gov/search/view_record.html?accession="},
279 {"ISFinder", "http://www-is.biotoul.fr/scripts/is/is_spec.idc?name="},
280 {"ISHAM-ITS", "http://its.mycologylab.org/BioloMICS.aspx?Table=Sequences&ExactMatch=T&Name=MITS"},
281 {"JCM", "http://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM="},
282 {"JGIDB", "http://genome.jgi-psf.org/cgi-bin/jgrs?id="},
283 {"LocusID", "https://www.ncbi.nlm.nih.gov/LocusLink/LocRpt.cgi?l="},
284 {"MaizeGDB", "http://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?"},
285 {"MedGen", "https://www.ncbi.nlm.nih.gov/medgen/"},
286 {"MGI", "http://www.informatics.jax.org/marker/MGI:"},
287 {"MIM", "https://www.ncbi.nlm.nih.gov/omim/"},
288 {"miRBase", "http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc="},
289 {"MycoBank", "http://www.mycobank.org/MycoTaxo.aspx?Link=T&Rec="},
290 {"NASONIABASE", "http://hymenopteragenome.org/cgi-bin/gbrowse/nasonia10_scaffold/?name="},
291 {"NBRC", "http://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT="},
292 {"NextDB", "http://nematode.lab.nig.ac.jp/cgi-bin/db/ShowGeneInfo.sh?celk="},
293 {"niaEST", "http://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1="},
294 {"NMPDR", "http://www.nmpdr.org/linkin.cgi?id="},
295 {"NRESTdb", "http://genome.ukm.my/nrestdb/db/single_view_est.php?id="},
296 {"OrthoMCL", "http://orthomcl.org/orthomcl/showRecord.do?name=GroupRecordClasses.GroupRecordClass&group_name="},
297 {"Osa1", "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name="},
298 {"PBmice", "http://www.idmshanghai.cn/PBmice/DetailedSearch.do?type=insert&id="},
299 {"PBR", "http://www.poxvirus.org/query.asp?web_id="},
300 {"PDB", "http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId="},
301 {"PFAM", "http://pfam.sanger.ac.uk/family?acc="},
302 {"PGN", "http://pgn.cornell.edu/cgi-bin/search/seq_search_result.pl?identifier="},
303 {"Phytozome", "http://www.phytozome.net/genePage.php?db=Phytozome&crown&method=0&search=1&detail=1&searchText=locusname:"},
304 {"PomBase", "http://www.pombase.org/spombe/result/"},
305 {"PseudoCap", "http://www.pseudomonas.com/getAnnotation.do?locusID="},
306 {"RAP-DB", "http://rapdb.dna.affrc.go.jp/cgi-bin/gbrowse_details/latest?name="},
307 {"RATMAP", "http://ratmap.gen.gu.se/ShowSingleLocus.htm?accno="},
308 {"RBGE_garden", "http://data.rbge.org.uk/living/"},
309 {"RBGE_herbarium", "http://data.rbge.org.uk/herb/"},
310 {"REBASE", "http://rebase.neb.com/rebase/enz/"},
311 {"RefSeq", "https://www.ncbi.nlm.nih.gov/nuccore/"},
312 {"RFAM", "http://rfam.xfam.org/family/"},
313 {"RGD", "http://rgd.mcw.edu/rgdweb/search/search.html?term="},
314 {"RiceGenes", "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object="},
315 {"SEED", "http://www.theseed.org/linkin.cgi?id="},
316 {"SGD", "http://www.yeastgenome.org/cgi-bin/locus.fpl?sgdid="},
317 {"SGN", "http://www.sgn.cornell.edu/search/est.pl?request_type=7&request_id="},
318 {"SK-FST", "http://aafc-aac.usask.ca/fst/"},
319 {"SRPDB", "http://rnp.uthscsa.edu/rnp/SRPDB/rna/sequences/fasta/"},
320 {"SubtiList", "http://genolist.pasteur.fr/SubtiList/genome.cgi?external_query+"},
321 {"TAIR", "http://www.arabidopsis.org/servlets/TairObject?type=locus&name="},
322 {"taxon", "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?"},
323 {"TIGRFAM", "http://www.jcvi.org/cgi-bin/tigrfams/HmmReportPage.cgi?acc="},
324 {"TubercuList", "http://tuberculist.epfl.ch/quicksearch.php?gene+name="},
325 {"UniGene", "https://www.ncbi.nlm.nih.gov/sites/entrez?Db=unigene&Cmd=Search&Term="},
326 {"UniProtKB/Swiss-Prot", "http://www.uniprot.org/uniprot/"},
327 {"UniProtKB/TrEMBL", "http://www.uniprot.org/uniprot/"},
328 {"UniSTS", "https://www.ncbi.nlm.nih.gov/probe?term="},
329 {"UNITE", "http://unite.ut.ee/bl_forw.php?nimi="},
330 {"VBASE2", "http://www.dnaplot.de/vbase2/vgene.php?id="},
331 {"VBRC", "http://vbrc.org/query.asp?web_view=curation&web_id="},
332 {"VectorBase", "http://www.vectorbase.org/Genome/BRCGene/?feature="},
333 {"Vega", "http://vega.sanger.ac.uk/id/"},
334 {"VGNC", "http://vertebrate.genenames.org/data/gene-symbol-report/#!/vgnc_id/VGNC:"},
335 {"ViPR", "http://www.viprbrc.org/brc/viprStrainDetails.do?viprSubmissionId="},
336 {"VISTA", "http://enhancer.lbl.gov/cgi-bin/dbxref.pl?id="},
337 {"WorfDB", "http://worfdb.dfci.harvard.edu/search.pl?form=1&search="},
338 {"WormBase", "http://www.wormbase.org/search/gene/"},
339 {"Xenbase", "http://www.xenbase.org/gene/showgene.do?method=display&geneId="},
340 {"ZFIN", "http://zfin.org/cgi-bin/webdriver?MIval=aa-markerview.apg&OID="},
341 };
342
DbNameIsValid(CharPtr db)343 static Int2 DbNameIsValid (
344 CharPtr db
345 )
346
347 {
348 Int2 L, R, mid;
349
350 if (StringHasNoText (db)) return -1;
351
352 L = 0;
353 R = sizeof (Nlm_url_base) / sizeof (Nlm_url_base [0]);
354
355 while (L < R) {
356 mid = (L + R) / 2;
357 if (StringICmp (Nlm_url_base [mid].tag, db) < 0) {
358 L = mid + 1;
359 } else {
360 R = mid;
361 }
362 }
363
364 /* case sensitive comparison at end enforces strictness */
365
366 if (StringCmp (Nlm_url_base [R].tag, db) == 0) {
367 return R;
368 }
369
370 return -1;
371 }
372
373 static const Char* kNCBIUrl = "http://www.ncbi.nlm.nih.gov/";
374 static const Char* kNCBISUrl = "https://www.ncbi.nlm.nih.gov/";
375
FF_www_get_url(StringItemPtr ffstring,CharPtr db,CharPtr identifier,BioseqPtr bsp)376 static void FF_www_get_url (
377 StringItemPtr ffstring,
378 CharPtr db,
379 CharPtr identifier,
380 BioseqPtr bsp
381 )
382
383 {
384 CharPtr base = NULL, prefix = NULL, ident = NULL,
385 suffix = NULL, url = NULL, redundant = NULL, ptr, str;
386 Char ch, buf [128], id [20], taxname [128], zeroes [16];
387 Boolean is_numeric;
388 Int2 len, num_alpha, num_digit, num_unscr, R;
389
390 if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
391
392 while (*identifier == ' ') {
393 identifier++;
394 }
395 ident = identifier;
396
397 R = DbNameIsValid (db);
398 if (R < 0) {
399 FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
400 return;
401 }
402
403 url = Nlm_url_base [R].url;
404
405 /* NCBI URL can be overridden by configuration file */
406
407 if (GetAppParam ("NCBI", "WWWENTREZ", "NCBI_URL_BASE", NULL, buf, sizeof (buf))) {
408 if (StringDoesHaveText (buf)) {
409 Uint1 len = 0;
410 if (StringNICmp (url, kNCBIUrl, strlen(kNCBIUrl)) == 0)
411 len = strlen(kNCBIUrl);
412 else if (StringNICmp (url, kNCBISUrl, strlen(kNCBISUrl)) == 0)
413 len = strlen(kNCBISUrl);
414 if (len > 0) {
415 url += len;
416 base = buf;
417 }
418 }
419 }
420
421 /* special cases */
422
423
424 if (StringCmp (db, "IRD") == 0) {
425
426 suffix = "&decorator=influenza";
427
428 } else if (StringCmp (db, "ATCC") == 0) {
429
430 suffix = ".aspx";
431
432 } else if (StringCmp (db, "BEI") == 0) {
433
434 suffix = ".aspx";
435
436 } else if (StringCmp (db, "ViPR") == 0) {
437
438 suffix = "&decorator=vipr";
439
440 } else if (StringCmp (db, "SRPDB") == 0) {
441
442 suffix = ".fasta";
443
444 } else if (StringCmp (db, "dbSTS") == 0) {
445
446 /*
447 is_numeric = TRUE;
448 str = identifier;
449 ch = *str;
450 while (ch != '\0') {
451 if (! IS_DIGIT (ch)) {
452 is_numeric = FALSE;
453 }
454 str++;
455 ch = *str;
456 }
457
458 if (is_numeric) {
459 prefix = "val=gnl|dbsts|";
460 } else if (ValidateAccn (identifier) == 0) {
461 prefix = "val=";
462 } else {
463 FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
464 return;
465 }
466 */
467
468 } else if (StringCmp (db, "FLYBASE") == 0) {
469
470 if (StringStr (identifier, "FBa") != NULL ) {
471 url = "http://www.fruitfly.org/cgi-bin/annot/fban?";
472 }
473
474 } else if (StringCmp (db, "Fungorum") == 0) {
475
476 str = identifier;
477 ch = *str;
478 while (ch != '\0' && ! IS_DIGIT (ch)) {
479 str++;
480 ch = *str;
481 }
482 ident = str;
483
484 } else if (StringCmp (db, "dictyBase") == 0) {
485
486 if (StringChr (identifier, '_') != NULL) {
487 url = "http://dictybase.org/db/cgi-bin/gene_page.pl?primary_id=";
488 }
489
490 } else if (StringCmp (db, "GDB") == 0) {
491
492 str = StringStr (identifier, "G00-");
493 if (str != NULL) {
494 ptr = id;
495 str += 4;
496 ch = *str;
497 while (ch != '\0') {
498 if (ch != '-') {
499 *ptr = ch;
500 ptr++;
501 }
502 str++;
503 ch = *str;
504 }
505 *ptr = '\0';
506 ident = id;
507 } else {
508 ch = *identifier;
509 if (! IS_DIGIT (ch)) {
510 FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
511 return;
512 }
513 }
514
515 } else if (StringCmp (db, "H_InvDB") == 0) {
516
517 if (StringStr (identifier, "HIT") != NULL) {
518 url = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=30&KEN_STR=";
519 } else if (StringStr (identifier, "HIX") != NULL) {
520 url = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR=";
521 }
522
523 } else if (StringCmp (db, "HOMD") == 0) {
524
525 if (StringStr (identifier, "tax_") != NULL ) {
526 url = "http://www.homd.org/taxon=";
527 ident += 4;
528 } else if (StringStr (identifier, "seq_") != NULL ) {
529 url = "http://www.homd.org/seq=";
530 ident += 4;
531 }
532
533 } else if (StringCmp (db, "IMGT/GENE-DB") == 0) {
534
535 if (bsp != NULL && BioseqToGeneticCode (bsp, NULL, NULL, NULL, taxname, sizeof (taxname), NULL)) {
536 if (StringCmp (taxname, "Homo sapiens") == 0) {
537 url = "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Homo+sapiens&query=2+";
538 }
539 if (StringCmp (taxname, "Mus musculus") == 0) {
540 url = "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Mus+musculus&query=2+";
541 }
542 }
543
544 } else if (StringCmp (db, "IMGT/HLA") == 0) {
545
546 if (StringNICmp (identifier, "HLA", 3) != 0 ) {
547 url = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=imgthla;id=";
548 }
549
550 } else if (StringCmp (db, "MaizeGDB") == 0) {
551
552 is_numeric = TRUE;
553 str = identifier;
554 ch = *str;
555 while (ch != '\0') {
556 if (! IS_DIGIT (ch)) {
557 is_numeric = FALSE;
558 }
559 str++;
560 ch = *str;
561 }
562
563 if (is_numeric) {
564 prefix = "id=";
565 } else {
566 prefix = "term=";
567 }
568
569 } else if (StringCmp (db, "miRBase") == 0) {
570
571 if (StringStr (identifier, "MIMAT") != NULL) {
572 url = "http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=";
573 }
574
575 } else if (StringCmp (db, "RefSeq") == 0) {
576
577 ch = identifier [0];
578 if (IS_ALPHA (ch) && identifier [1] == 'P' && identifier [2] == '_') {
579 url = "https://www.ncbi.nlm.nih.gov/protein/";
580 }
581
582 } else if (StringCmp (db, "WormBase") == 0) {
583
584 if (LooksLikeAccession (identifier, &num_alpha, &num_digit, &num_unscr) && num_alpha == 3 && num_digit == 5) {
585 url = "http://www.wormbase.org/search/protein/";
586 }
587
588 } else if (StringCmp (db, "niaEST") == 0) {
589
590 suffix = "&val=1";
591
592 } else if (StringCmp (db, "RAP-DB") == 0) {
593
594 suffix = ";class=locus_id";
595
596 } else if (StringCmp (db, "REBASE") == 0) {
597
598 suffix = ".html";
599
600 } else if (StringCmp (db, "SK-FST") == 0) {
601
602 ident = NULL;
603
604 } else if (StringCmp (db, "taxon") == 0) {
605
606 ch = *identifier;
607 if (IS_DIGIT (ch)) {
608 prefix = "id=";
609 } else {
610 prefix = "name=";
611 }
612
613 } else if (StringCmp (db, "UniSTS") == 0) {
614
615 suffix = "%20%5BUniSTS%20ID%5D";
616
617 } else if (StringCmp (db, "HGNC") == 0) {
618
619 if (StringNCmp (identifier, "HGNC:", 5) == 0 ) {
620 ident += 5;
621 }
622 redundant = "HGNC:";
623
624 } else if (StringCmp (db, "VGNC") == 0) {
625
626 if (StringNCmp (identifier, "VGNC:", 5) == 0 ) {
627 ident += 5;
628 }
629 redundant = "VGNC:";
630
631 } else if (StringCmp (db, "MGI") == 0) {
632
633 if (StringNCmp (identifier, "MGI:", 4) == 0 ) {
634 ident += 4;
635 }
636 redundant = "MGI:";
637
638 } else if (StringCmp (db, "RGD") == 0) {
639
640 if (StringNCmp (identifier, "RGD:", 4) == 0 ) {
641 ident += 4;
642 }
643
644 } else if (StringCmp (db, "ISHAM-ITS") == 0) {
645
646 if (StringNCmp (identifier, "MITS", 4) == 0 ) {
647 ident += 4;
648 }
649
650 } else if (StringCmp (db, "NBRC") == 0) {
651
652 len = StringLen (identifier);
653 if (len < 8) {
654 StringCpy (zeroes, "00000000");
655 prefix = zeroes + len;
656 }
657
658 }
659
660 /* now generate URL */
661
662 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
663 if (StringDoesHaveText (base)) {
664 FFAddOneString (ffstring, base, FALSE, FALSE, TILDE_IGNORE);
665 }
666 FFAddOneString (ffstring, url, FALSE, FALSE, TILDE_IGNORE);
667 if (StringDoesHaveText (prefix)) {
668 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
669 }
670 if (StringDoesHaveText (ident)) {
671 FFAddOneString (ffstring, ident, FALSE, FALSE, TILDE_IGNORE);
672 }
673 if (StringDoesHaveText (suffix)) {
674 FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
675 }
676 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
677 if (StringDoesHaveText (redundant)) {
678 FFAddOneString (ffstring, redundant, FALSE, FALSE, TILDE_IGNORE);
679 }
680 FFAddOneString (ffstring, identifier, FALSE, FALSE, TILDE_IGNORE);
681 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
682 }
683
FF_www_db_xref(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,CharPtr db,CharPtr identifier,BioseqPtr bsp)684 NLM_EXTERN void FF_www_db_xref (
685 IntAsn2gbJobPtr ajp,
686 StringItemPtr ffstring,
687 CharPtr db,
688 CharPtr identifier,
689 BioseqPtr bsp
690 )
691 {
692 CharPtr colon = ":";
693
694 if (ffstring == NULL || StringHasNoText (db) || StringHasNoText (identifier)) return;
695
696 if (StringCmp (db, "HGNC") == 0 && StringNCmp (identifier, "HGNC:", 5) == 0) {
697 identifier += 5;
698 } else if (StringCmp (db, "VGNC") == 0 && StringNCmp (identifier, "VGNC:", 5) == 0) {
699 identifier += 5;
700 } else if (StringCmp (db, "MGI") == 0 && StringNCmp (identifier, "MGI:", 4) == 0) {
701 identifier += 4;
702 }
703
704 if (GetWWW (ajp)) {
705 FFAddTextToString (ffstring, db, colon, NULL, FALSE, FALSE, TILDE_IGNORE);
706 FF_www_get_url (ffstring, db, identifier, bsp);
707 } else {
708 if (StringCmp (db, "HGNC") == 0) {
709 colon = ":HGNC:";
710 } else if (StringCmp (db, "VGNC") == 0) {
711 colon = ":VGNC:";
712 } else if (StringCmp (db, "MGI") == 0) {
713 colon = ":MGI:";
714 }
715 FFAddTextToString (ffstring, db, colon, identifier, FALSE, FALSE, TILDE_IGNORE);
716 }
717 }
718
FF_Add_NCBI_Base_URL(StringItemPtr ffstring,CharPtr url)719 NLM_EXTERN void FF_Add_NCBI_Base_URL (
720 StringItemPtr ffstring,
721 CharPtr url
722 )
723
724 {
725 CharPtr base = NULL;
726 Char buf [128];
727
728 if (ffstring == NULL || StringHasNoText (url)) return;
729
730 /* NCBI URL can be overridden by configuration file */
731
732 if (GetAppParam ("NCBI", "WWWENTREZ", "NCBI_URL_BASE", NULL, buf, sizeof (buf))) {
733 if (StringDoesHaveText (buf)) {
734 Uint1 len = 0;
735 if (StringNICmp (url, kNCBIUrl, strlen(kNCBIUrl)) == 0)
736 len = strlen(kNCBIUrl);
737 else if (StringNICmp (url, kNCBISUrl, strlen(kNCBISUrl)) == 0)
738 len = strlen(kNCBISUrl);
739 if (len > 0) {
740 url += len;
741 base = buf;
742 }
743 }
744 }
745
746 if (StringDoesHaveText (base)) {
747 FFAddOneString (ffstring, base, FALSE, FALSE, TILDE_IGNORE);
748 }
749 FFAddOneString (ffstring, url, FALSE, FALSE, TILDE_IGNORE);
750 }
751
752
753 /* ************** */
754
755
756 /* public function to get URLs for collaboration-approved db_xrefs */
757
758 static Boolean links_loaded = FALSE;
759
asn2gnbk_dbxref(DbtagPtr dbt)760 NLM_EXTERN CharPtr asn2gnbk_dbxref (
761 DbtagPtr dbt
762 )
763
764 {
765 IntAsn2gbJobPtr ajp;
766 Char buf [128];
767 StringItemPtr ffstring;
768 ObjectIdPtr oip;
769 CharPtr ptr;
770 CharPtr str;
771 CharPtr tmp;
772
773 if (dbt == NULL) return NULL;
774 if (StringHasNoText (dbt->db)) return NULL;
775 oip = dbt->tag;
776 if (oip == NULL) return NULL;
777
778 if (! StringHasNoText (oip->str)) {
779 if (StringLen (dbt->db) + StringLen (oip->str) < 80) {
780 sprintf (buf, "%s", oip->str);
781 }
782 } else {
783 sprintf (buf, "%ld", (long) oip->id);
784 }
785
786 ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
787 if (ajp == NULL) return NULL;
788 ffstring = FFGetString (ajp);
789 if ( ffstring == NULL ) return NULL;
790
791 if (! links_loaded) {
792 InitWWW (ajp);
793 links_loaded = TRUE;
794 }
795 ajp->www = TRUE;
796
797 FF_www_db_xref (ajp, ffstring, dbt->db, buf, NULL);
798
799 ajp->www = FALSE;
800
801 str = FFToCharPtr (ffstring);
802
803 FFRecycleString (ajp, ffstring);
804 /*
805 MemFree (ajp);
806 */
807 asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
808
809 tmp = StringChr (str, '<');
810 if (tmp != NULL) {
811 ptr = StringSave (tmp);
812 tmp = StringChr (ptr, '>');
813 if (tmp != NULL) {
814 tmp++;
815 *tmp = '\0';
816 }
817 MemFree (str);
818 str = ptr;
819 } else {
820 str = MemFree (str);
821 }
822
823 return str;
824 }
825
826 /* format references section */
827
GetAuthListPtr(PubdescPtr pdp,CitSubPtr csp)828 NLM_EXTERN AuthListPtr GetAuthListPtr (
829 PubdescPtr pdp,
830 CitSubPtr csp
831 )
832
833 {
834 AuthListPtr alp = NULL;
835 CitArtPtr cap;
836 CitBookPtr cbp;
837 CitGenPtr cgp;
838 CitPatPtr cpp;
839 ValNodePtr vnp;
840
841 if (csp != NULL) {
842 alp = csp->authors;
843 if (alp != NULL) return alp;
844 }
845 if (pdp == NULL) return NULL;
846
847 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
848 switch (vnp->choice) {
849 case PUB_Gen :
850 cgp = (CitGenPtr) vnp->data.ptrvalue;
851 if (cgp != NULL) {
852 alp = cgp->authors;
853 }
854 break;
855 case PUB_Sub :
856 csp = (CitSubPtr) vnp->data.ptrvalue;
857 if (csp != NULL) {
858 alp = csp->authors;
859 }
860 break;
861 case PUB_Article :
862 cap = (CitArtPtr) vnp->data.ptrvalue;
863 if (cap != NULL) {
864 alp = cap->authors;
865 }
866 break;
867 case PUB_Book :
868 case PUB_Proc :
869 case PUB_Man :
870 cbp = (CitBookPtr) vnp->data.ptrvalue;
871 if (cbp != NULL) {
872 alp = cbp->authors;
873 }
874 break;
875 case PUB_Patent :
876 cpp = (CitPatPtr) vnp->data.ptrvalue;
877 if (cpp != NULL) {
878 alp = cpp->authors;
879 }
880 break;
881 default :
882 break;
883 }
884
885 if (alp != NULL) return alp;
886 }
887
888 return NULL;
889 }
890
MakeSingleAuthorString(FmtType format,CharPtr prefix,CharPtr name,CharPtr initials,CharPtr suffix,IndxPtr index,GBReferencePtr gbref)891 static CharPtr MakeSingleAuthorString (
892 FmtType format,
893 CharPtr prefix,
894 CharPtr name,
895 CharPtr initials,
896 CharPtr suffix,
897 IndxPtr index,
898 GBReferencePtr gbref
899 )
900
901 {
902 Char ch;
903 Char dummy [10];
904 size_t len;
905 CharPtr nametoindex;
906 CharPtr ptr;
907 CharPtr str;
908 CharPtr tmp;
909
910 if (name == NULL) return NULL;
911
912 /* !!! clean up 'et al' as (presumably) last author !!! */
913
914 /* !!! temporary to suppress diff !!! */
915 {
916 if (StringLen (name) <= 6 &&
917 (StringNICmp (name, "et al", 5) == 0 || StringNICmp (name, "et,al", 5) == 0)) {
918 if (StringCmp (prefix, " and ") == 0) {
919 prefix = NULL;
920 dummy [0] = ' ';
921 StringNCpy_0 (dummy + 1, name, sizeof (dummy) - 1);
922 name = dummy;
923 }
924 }
925 }
926 /*
927 if (StringLen (name) <= 6 &&
928 (StringNICmp (name, "et al", 5) == 0 || StringNICmp (name, "et,al", 5) == 0)) {
929 name = "et al.";
930 if (StringCmp (prefix, " and ") == 0) {
931 prefix = ", ";
932 }
933 }
934 */
935
936 len = StringLen (name) + StringLen (initials) + StringLen (suffix) + StringLen (prefix);
937 str = MemNew (sizeof (Char) * (len + 4));
938 if (str == NULL) return NULL;
939
940 ptr = str;
941 if (! StringHasNoText (prefix)) {
942 ptr = StringMove (ptr, prefix);
943 }
944 nametoindex = ptr;
945
946 /* initials and suffix to support structured name fields */
947
948 tmp = StringMove (ptr, name);
949 if (! StringHasNoText (initials)) {
950 tmp = StringMove (tmp, ",");
951 tmp = StringMove (tmp, initials);
952 }
953 if (! StringHasNoText (suffix)) {
954 tmp = StringMove (tmp, " ");
955 tmp = StringMove (tmp, suffix);
956 }
957
958 /* optionally populate indexes for NCBI internal database */
959
960 if (index != NULL) {
961 ValNodeCopyStrToHead (&(index->authors), 0, nametoindex);
962 }
963
964 /* optionally populate gbseq for XML-ized GenBank format */
965
966 if (gbref != NULL) {
967 ValNodeCopyStr (&(gbref->authors), 0, nametoindex);
968 }
969
970 /* if embl, remove commas in individual names, starting after prefix */
971
972 if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
973 tmp = ptr;
974 ch = *tmp;
975 while (ch != '\0') {
976 if (ch == ',') {
977 *tmp = ' ';
978 }
979 tmp++;
980 ch = *tmp;
981 }
982 }
983
984 return str;
985 }
986
GetAuthorsString(FmtType format,AuthListPtr alp,CharPtr PNTR consortP,IndxPtr index,GBReferencePtr gbref)987 NLM_EXTERN CharPtr GetAuthorsString (
988 FmtType format,
989 AuthListPtr alp,
990 CharPtr PNTR consortP,
991 IndxPtr index,
992 GBReferencePtr gbref
993 )
994
995 {
996 AuthorPtr ap;
997 ValNodePtr clist;
998 ValNodePtr conslist;
999 Int2 count;
1000 ValNodePtr head = NULL;
1001 ValNodePtr names;
1002 ValNodePtr next;
1003 NameStdPtr nsp;
1004 PersonIdPtr pid;
1005 ValNodePtr pidlist;
1006 CharPtr prefix = NULL;
1007 CharPtr str;
1008 ValNodePtr vnp;
1009
1010 if (alp == NULL) return NULL;
1011
1012 /*
1013 alp = AsnIoMemCopy ((Pointer) alp,
1014 (AsnReadFunc) AuthListAsnRead,
1015 (AsnWriteFunc) AuthListAsnWrite);
1016 if (alp == NULL) return NULL;
1017 */
1018
1019 count = 0;
1020 if (alp->choice == 1) {
1021
1022 pidlist = NULL;
1023 conslist = NULL;
1024
1025 for (names = alp->names; names != NULL; names = names->next) {
1026 ap = (AuthorPtr) names->data.ptrvalue;
1027 if (ap == NULL) continue;
1028 pid = ap->name;
1029 if (pid == NULL) continue;
1030 if (pid->choice == 2 || pid->choice == 3 || pid->choice == 4) {
1031 ValNodeAddPointer (&pidlist, 0, (Pointer) pid);
1032 } else if (pid->choice == 5) {
1033 ValNodeAddPointer (&conslist, 0, (Pointer) pid);
1034 }
1035 }
1036
1037 for (vnp = pidlist; vnp != NULL; vnp = vnp->next) {
1038 next = vnp->next;
1039 if (next == NULL) {
1040 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1041 if (count == 0) {
1042 prefix = NULL;
1043 } else {
1044 prefix = " and ";
1045 }
1046 }
1047 }
1048 str = NULL;
1049 pid = (PersonIdPtr) vnp->data.ptrvalue;
1050 if (pid->choice == 2) {
1051 nsp = (NameStdPtr) pid->data;
1052 if (nsp != NULL) {
1053 if (! StringHasNoText (nsp->names [0])) {
1054 str = MakeSingleAuthorString (format, prefix, nsp->names [0], nsp->names [4], nsp->names [5], index, gbref);
1055 } else if (! StringHasNoText (nsp->names [3])) {
1056 str = MakeSingleAuthorString (format, prefix, nsp->names [3], NULL, NULL, index, gbref);
1057 }
1058 }
1059 } else if (pid->choice == 3 || pid->choice == 4) {
1060 str = MakeSingleAuthorString (format, prefix, (CharPtr) pid->data, NULL, NULL, index, gbref);
1061 }
1062 if (str != NULL) {
1063 ValNodeAddStr (&head, 0, str);
1064 count++;
1065 }
1066 prefix = ", ";
1067 }
1068
1069 prefix = NULL;
1070 clist = NULL;
1071 for (vnp = conslist; vnp != NULL; vnp = vnp->next) {
1072 str = NULL;
1073 pid = (PersonIdPtr) vnp->data.ptrvalue;
1074 if (pid->choice == 5) {
1075 str = MakeSingleAuthorString (format, prefix, (CharPtr) pid->data, NULL, NULL, index, NULL);
1076 if (str != NULL) {
1077 ValNodeAddStr (&clist, 0, str);
1078 }
1079 prefix = "; ";
1080 }
1081 }
1082 if (clist != NULL) {
1083 str = MergeFFValNodeStrs (clist);
1084 if ((! StringHasNoText (str)) && consortP != NULL && *consortP == NULL) {
1085 *consortP = StringSave (str);
1086 }
1087
1088 /* optionally populate gbseq for XML-ized GenBank format */
1089
1090 if (gbref != NULL) {
1091 gbref->consortium = StringSave (str);
1092 }
1093
1094 str = MemFree (str);
1095 ValNodeFreeData (clist);
1096 }
1097
1098 ValNodeFree (pidlist);
1099 ValNodeFree (conslist);
1100
1101 } else if (alp->choice == 2 || alp->choice == 3) {
1102 for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
1103 next = vnp->next;
1104 if (next == NULL) {
1105 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1106 if (count == 0) {
1107 prefix = NULL;
1108 } else {
1109 prefix = " and ";
1110 }
1111 }
1112 }
1113 str = MakeSingleAuthorString (format, prefix, (CharPtr) vnp->data.ptrvalue, NULL, NULL, index, gbref);
1114 if (str != NULL) {
1115 ValNodeAddStr (&head, 0, str);
1116 count++;
1117 }
1118 prefix = ", ";
1119 }
1120 }
1121
1122 str = MergeFFValNodeStrs (head);
1123
1124 ValNodeFreeData (head);
1125
1126 /*
1127 AuthListFree (alp);
1128 */
1129
1130 return str;
1131 }
1132
1133 /*
1134 Strips all spaces in string in following manner. If the function
1135 meet several spaces (spaces and tabs) in succession it replaces them
1136 with one space. Strips all spaces after '(' and before ')'
1137 */
1138
StrStripSpaces(CharPtr str)1139 static void StrStripSpaces (
1140 CharPtr str
1141 )
1142
1143 {
1144 CharPtr new_str;
1145
1146 if (str == NULL) return;
1147
1148 new_str = str;
1149 while (*str != '\0') {
1150 *new_str++ = *str;
1151 if (*str == ' ' || *str == '\t' || *str == '(') {
1152 for (str++; *str == ' ' || *str == '\t'; str++) continue;
1153 if (*str == ')' || *str == ',') {
1154 if( *(new_str - 1) != '(' ) { // this if handles the case "\([ \t]*\)"
1155 --new_str;
1156 }
1157 }
1158 } else {
1159 str++;
1160 }
1161 }
1162 *new_str = '\0';
1163 }
1164
AllCaps(CharPtr p)1165 static Boolean AllCaps (
1166 CharPtr p
1167 )
1168
1169 {
1170 if (p == NULL) return FALSE;
1171
1172 for (p++; p != NULL && *p != '\0'; p++) {
1173 if (IS_LOWER (*p)) return FALSE;
1174 }
1175 return TRUE;
1176 }
1177
CleanEquals(CharPtr p)1178 static void CleanEquals (
1179 CharPtr p
1180 )
1181
1182 {
1183 if (p == NULL) return;
1184
1185 for (; *p != '\0'; p++) {
1186 if (*p == '\"') {
1187 *p = '\'';
1188 }
1189 }
1190 }
1191
GetPubTitle(FmtType format,PubdescPtr pdp,CitSubPtr csp)1192 static CharPtr GetPubTitle (
1193 FmtType format,
1194 PubdescPtr pdp,
1195 CitSubPtr csp
1196 )
1197
1198 {
1199 CitArtPtr cap;
1200 CitBookPtr cbp;
1201 CitGenPtr cgp;
1202 Char ch;
1203 CitPatPtr cpp;
1204 MedlineEntryPtr mep;
1205 CharPtr ptr;
1206 CharPtr title = NULL;
1207 ValNodePtr ttl = NULL;
1208 ValNodePtr vnp;
1209
1210 if (csp != NULL) {
1211 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1212 title = "Direct Submission";
1213 return StringSave (title);
1214 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1215 return NULL;
1216 }
1217 }
1218 if (pdp == NULL) return NULL;
1219
1220 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
1221 switch (vnp->choice) {
1222 case PUB_Gen :
1223 cgp = (CitGenPtr) vnp->data.ptrvalue;
1224 if (cgp != NULL) {
1225 if (! StringHasNoText (cgp->title)) return StringSave (cgp->title);
1226 if (! StringHasNoText (cgp->cit)) {
1227 ptr = StringStr (cgp->cit, "Title=\"");
1228 if (ptr != NULL) {
1229 title = StringSave (ptr + 7);
1230 for (ptr = title; *ptr != '\0'; ptr++) {
1231 if (*ptr == '"') {
1232 *ptr = '\0';
1233 break;
1234 }
1235 }
1236 return title;
1237 }
1238 }
1239 }
1240 break;
1241 case PUB_Sub :
1242 csp = (CitSubPtr) vnp->data.ptrvalue;
1243 if (csp != NULL) {
1244 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1245 title = "Direct Submission";
1246 return StringSave (title);
1247 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1248 return NULL;
1249 }
1250 }
1251 break;
1252 case PUB_Medline :
1253 mep = (MedlineEntryPtr) vnp->data.ptrvalue;
1254 if (mep != NULL) {
1255 cap = mep->cit;
1256 if (cap != NULL) {
1257 ttl = cap->title;
1258 }
1259 }
1260 break;
1261 case PUB_Article :
1262 cap = (CitArtPtr) vnp->data.ptrvalue;
1263 if (cap != NULL) {
1264 ttl = cap->title;
1265 }
1266 break;
1267 /* case PUB_Book : */
1268 case PUB_Proc :
1269 case PUB_Man :
1270 cbp = (CitBookPtr) vnp->data.ptrvalue;
1271 if (cbp != NULL) {
1272 ttl = cbp->title;
1273 if (ttl != NULL) {
1274 title = (CharPtr) ttl->data.ptrvalue;
1275 if (! StringHasNoText (title)) {
1276 title = StringSave (title);
1277 if (StringLen (title) > 3) {
1278 ch = *title;
1279 if (IS_LOWER (ch)) {
1280 *title = TO_UPPER (ch);
1281 }
1282 ptr = title;
1283 if (AllCaps (ptr)) {
1284 for (ptr++; ptr != NULL && *ptr != '\0'; ptr++) {
1285 ch = *ptr;
1286 *ptr = TO_LOWER (ch);
1287 }
1288 }
1289 }
1290 return title;
1291 }
1292 }
1293 }
1294 break;
1295 case PUB_Patent :
1296 cpp = (CitPatPtr) vnp->data.ptrvalue;
1297 if (cpp != NULL) {
1298 title = cpp->title;
1299 if (! StringHasNoText (title)) {
1300 return StringSave (title);
1301 }
1302 }
1303 break;
1304 default :
1305 break;
1306 }
1307
1308 if (ttl != NULL) {
1309 title = (CharPtr) ttl->data.ptrvalue;
1310 if (! StringHasNoText (title)) {
1311 return StringSave (title);
1312 }
1313 }
1314 }
1315
1316 return NULL;
1317 }
1318
CleanPubTitle(CharPtr title)1319 static void CleanPubTitle (
1320 CharPtr title
1321 )
1322
1323 {
1324 CharPtr p;
1325 Boolean remove_it;
1326
1327 if (title == NULL) return;
1328
1329 CleanEquals (title);
1330
1331 for (p = title + StringLen (title) - 1; p > title + 2; p--) {
1332 if (*p == ' ') {
1333 *p = '\0';
1334 } else if (*p == '.') {
1335 remove_it = FALSE;
1336 if (p > title + 5) {
1337 if (*(p - 1) != '.' || *(p - 2) != '.') {
1338 remove_it = TRUE;
1339 }
1340 }
1341 if (remove_it) {
1342 *p = '\0';
1343 }
1344 break;
1345 } else {
1346 break;
1347 }
1348 }
1349 }
1350
1351 /*
1352 medline type page numbering is expanded (e.g., 125-35 -> 125-135,
1353 F124-34 -> F124-F134, 12a-c -> 12a-12c).
1354 If only one page is given, this is output without a dash.
1355 Expanded numbering is validated to ensure that the
1356 first number is smaller than or equal to the second and
1357 that the first letter is less than or identical to the second
1358 (i.e., a < c). If the input is all letters (i.e., roman numerals)
1359 this is not validated.
1360
1361 Return values:
1362 0 : valid page numbering.
1363 -1 : invalid page numbering.
1364 */
1365
1366 #define MAX_PAGE_DIGITS 12
1367
FixPages(CharPtr out_pages,CharPtr in_pages)1368 static Int2 FixPages (
1369 CharPtr out_pages,
1370 CharPtr in_pages
1371 )
1372
1373 {
1374 Boolean dash=TRUE, first_alpha;
1375 Char firstbegin[MAX_PAGE_DIGITS];
1376 Char secondbegin[MAX_PAGE_DIGITS];
1377 Char firstend[MAX_PAGE_DIGITS];
1378 Char secondend[MAX_PAGE_DIGITS];
1379 Char temp[MAX_PAGE_DIGITS];
1380 CharPtr alphabegin, numbegin = NULL, alphaend, numend = NULL, ptr, in=in_pages;
1381 Int2 diff, index, retval=0;
1382 Int2 length_nb, length_ab, length_ne, length_ae;
1383 Int4 num1=0, num2=0;
1384
1385 if (in_pages == NULL) return retval;
1386
1387 while (*in != '\0')
1388 { /* Check for digits in input*/
1389 if (IS_DIGIT(*in))
1390 break;
1391 in++;
1392 }
1393
1394 if (*in == '\0' || (in != in_pages && *(in-1) == ' '))
1395 { /* if all letters (i.e. roman numerals), put out. */
1396 out_pages = StringCpy(out_pages, in_pages);
1397 return retval;
1398 }
1399
1400 in = in_pages;
1401 if (IS_DIGIT(*in))
1402 { /* Do digits come first? */
1403 first_alpha = FALSE;
1404 index=0;
1405 while (IS_DIGIT(*in) || *in == ' ')
1406 {
1407 firstbegin[index] = *in;
1408 if (*in != ' ')
1409 index++;
1410 in++;
1411 if (*in == '-')
1412 break;
1413
1414 }
1415 firstbegin[index] = '\0';
1416 index=0;
1417 if (*in != '-')
1418 { /* After digits look for letters. */
1419 while (IS_ALPHA(*in) || *in == ' ')
1420 {
1421 secondbegin[index] = *in;
1422 index++;
1423 in++;
1424 if (*in == '-')
1425 break;
1426 }
1427 }
1428 secondbegin[index] = '\0';
1429 if (*in == '-') /* if dash is not present, note */
1430 in++;
1431 else
1432 dash=FALSE;
1433 index=0;
1434 while (IS_DIGIT(*in) || *in == ' ')
1435 { /* Look for digits. */
1436 firstend[index] = *in;
1437 if (*in != ' ')
1438 index++;
1439 in++;
1440 }
1441 firstend[index] = '\0';
1442 index=0;
1443 if (*in != '\0')
1444 { /* Look for letters again. */
1445 while (IS_ALPHA(*in) || *in == ' ')
1446 {
1447 secondend[index] = *in;
1448 index++;
1449 in++;
1450 }
1451 }
1452 secondend[index] = '\0';
1453 }
1454 else
1455 { /* Do letters come first? */
1456 first_alpha = TRUE;
1457 index=0;
1458 while (IS_ALPHA(*in) || *in == ' ')
1459 {
1460 firstbegin[index] = *in;
1461 index++;
1462 in++;
1463 if (*in == '-')
1464 break;
1465 }
1466 firstbegin[index] = '\0';
1467 index=0;
1468 if (*in != '-')
1469 { /* After letters look for digits. */
1470 while (IS_DIGIT(*in) || *in == ' ')
1471 {
1472 secondbegin[index] = *in;
1473 if (*in != ' ')
1474 index++;
1475 in++;
1476 if (*in == '-')
1477 break;
1478 }
1479 }
1480 secondbegin[index] = '\0';
1481 if (*in == '-') /* Note if dash is missing. */
1482 in++;
1483 else
1484 dash=FALSE;
1485 index=0;
1486 while (IS_ALPHA(*in) || *in == ' ')
1487 { /* Look for letters again. */
1488 firstend[index] = *in;
1489 index++;
1490 in++;
1491 }
1492 firstend[index] = '\0';
1493 index=0;
1494 if (*in != '\0')
1495 { /* Any digits here? */
1496 while (IS_DIGIT(*in) || *in == ' ')
1497 {
1498 secondend[index] = *in;
1499 if (*in != ' ')
1500 index++;
1501 in++;
1502 }
1503 }
1504 secondend[index] = '\0';
1505 }
1506
1507 if (first_alpha)
1508 {
1509 alphabegin = firstbegin;
1510 numbegin = secondbegin;
1511 alphaend = firstend;
1512 numend = secondend;
1513 }
1514 else
1515 {
1516 numbegin = firstbegin;
1517 alphabegin = secondbegin;
1518 numend = firstend;
1519 alphaend = secondend;
1520 }
1521
1522 length_nb = StringLen(numbegin);
1523 length_ab = StringLen(alphabegin);
1524 length_ne = StringLen(numend);
1525 length_ae = StringLen(alphaend);
1526
1527 /* If no dash, but second letters or numbers present, reject. */
1528 if (dash == FALSE)
1529 {
1530 if (length_ne != 0 || length_ae != 0)
1531 retval = -1;
1532 }
1533 /* Check for situations like "AAA-123" or "222-ABC". */
1534 if (dash == TRUE)
1535 {
1536 if (length_ne == 0 && length_ab == 0)
1537 retval = -1;
1538 else if (length_ae == 0 && length_nb == 0)
1539 retval = -1;
1540 }
1541
1542 /* The following expands "F502-512" into "F502-F512" and
1543 checks, for entries like "12a-12c" that a > c. "12aa-12ab",
1544 "125G-137A", "125-G137" would be rejected. */
1545 if (retval == 0)
1546 {
1547 if (length_ab > 0)
1548 {
1549 if (length_ae > 0)
1550 {
1551 if (StringCmp(alphabegin, alphaend) != 0)
1552 {
1553 if (length_ab != 1 || length_ae != 1)
1554 retval = -1;
1555 else if (*alphabegin > *alphaend)
1556 retval = -1;
1557 }
1558 }
1559 else
1560 {
1561 alphaend = alphabegin;
1562 length_ae = length_ab;
1563 }
1564 }
1565 else if (length_ae > 0)
1566 retval = -1;
1567 }
1568
1569 /* The following expands "125-37" into "125-137". */
1570 if (retval == 0)
1571 {
1572 if (length_nb > 0)
1573 {
1574 if (length_ne > 0)
1575 {
1576 diff = length_nb - length_ne;
1577 if (diff > 0)
1578 {
1579 index=0;
1580 while (numend[index] != '\0')
1581 {
1582 temp[index+diff] = numend[index];
1583 index++;
1584 }
1585 temp[index+diff] = numend[index];
1586 for (index=0; index<diff; index++)
1587 temp[index] = numbegin[index];
1588 index=0;
1589 while (temp[index] != '\0')
1590 {
1591 numend[index] = temp[index];
1592 index++;
1593 }
1594 numend[index] = temp[index];
1595 }
1596 }
1597 else
1598 {
1599 numend = numbegin;
1600 length_ne = length_nb;
1601 }
1602
1603 }
1604 else if (length_ne > 0)
1605 retval = -1;
1606 /* Check that the first number is <= the second (expanded) number. */
1607 if (retval == 0)
1608 {
1609 /* sscanf(numbegin, "%ld", &num_type);
1610 num1 = (Int4) num_type;
1611 sscanf( numend, "%ld", &num_type);
1612 num2 = (Int4) num_type;
1613 */
1614 num1 = (Int4) atol(numbegin);
1615 num2 = (Int4) atol(numend);
1616 if (num2 < num1)
1617 retval = -1;
1618 }
1619 }
1620
1621 if (retval == -1)
1622 {
1623 out_pages = StringCpy(out_pages, in_pages);
1624 }
1625 else
1626 {
1627 ptr = out_pages;
1628 /* Place expanded and validated page numbers into "out_pages". */
1629 if (first_alpha)
1630 {
1631 while (*alphabegin != '\0')
1632 {
1633 *ptr = *alphabegin;
1634 alphabegin++;
1635 ptr++;
1636 }
1637 while (*numbegin != '\0')
1638 {
1639 *ptr = *numbegin;
1640 numbegin++;
1641 ptr++;
1642 }
1643 if (dash == TRUE)
1644 {
1645 *ptr = '-';
1646 ptr++;
1647 while (*alphaend != '\0')
1648 {
1649 *ptr = *alphaend;
1650 alphaend++;
1651 ptr++;
1652 }
1653 while (*numend != '\0')
1654 {
1655 *ptr = *numend;
1656 numend++;
1657 ptr++;
1658 }
1659 }
1660 *ptr = '\0';
1661 }
1662 else
1663 {
1664 while (*numbegin != '\0')
1665 {
1666 *ptr = *numbegin;
1667 numbegin++;
1668 ptr++;
1669 }
1670 while (*alphabegin != '\0')
1671 {
1672 *ptr = *alphabegin;
1673 alphabegin++;
1674 ptr++;
1675 }
1676 if (dash == TRUE)
1677 {
1678 *ptr = '-';
1679 ptr++;
1680 while (*numend != '\0')
1681 {
1682 *ptr = *numend;
1683 numend++;
1684 ptr++;
1685 }
1686 while (*alphaend != '\0')
1687 {
1688 *ptr = *alphaend;
1689 alphaend++;
1690 ptr++;
1691 }
1692 }
1693 *ptr = '\0';
1694 }
1695 }
1696 return retval;
1697 }
1698
1699 /* !!! still need to add StripParanthesis equivalent !!! */
1700
DoSup(ValNodePtr PNTR head,CharPtr issue,CharPtr part_sup,CharPtr part_supi)1701 static void DoSup (
1702 ValNodePtr PNTR head,
1703 CharPtr issue,
1704 CharPtr part_sup,
1705 CharPtr part_supi
1706 )
1707
1708 {
1709 size_t len;
1710 CharPtr str;
1711 CharPtr temp;
1712
1713 len = StringLen (issue) + StringLen (part_sup) + StringLen (part_supi) + 30;
1714 str = MemNew (sizeof (Char) * len);
1715 if (str == NULL) return;
1716 temp = str;
1717
1718 if (! StringHasNoText (part_sup)) {
1719 *temp = ' ';
1720 temp++;
1721 *temp = '(';
1722 temp++;
1723 temp = StringMove (temp, part_sup);
1724 *temp = ')';
1725 temp++;
1726 }
1727 if (StringHasNoText (issue) && StringHasNoText (part_supi)) {
1728 ValNodeCopyStr (head, 0, str);
1729 MemFree (str);
1730 return;
1731 }
1732 *temp = ' ';
1733 temp++;
1734 *temp = '(';
1735 temp++;
1736 if (! StringHasNoText (issue)) {
1737 temp = StringMove (temp, issue);
1738 }
1739 if (! StringHasNoText (part_supi)) {
1740 *temp = ' ';
1741 temp++;
1742 temp = StringMove (temp, part_supi);
1743 }
1744 *temp = ')';
1745 temp++;
1746 ValNodeCopyStr (head, 0, str);
1747 MemFree (str);
1748 }
1749
FormatCitJour(FmtType format,Boolean citArtIsoJta,CitJourPtr cjp)1750 static CharPtr FormatCitJour (
1751 FmtType format,
1752 Boolean citArtIsoJta,
1753 CitJourPtr cjp
1754 )
1755
1756 {
1757 Char buf [256];
1758 DatePtr dp;
1759 Boolean electronic_journal = FALSE;
1760 ValNodePtr head = NULL;
1761 ImprintPtr imp;
1762 CharPtr issue = NULL;
1763 Char pages [128];
1764 CharPtr part_sup = NULL;
1765 CharPtr part_supi = NULL;
1766 CharPtr rsult = NULL;
1767 CharPtr title = NULL;
1768 ValNodePtr ttl;
1769 CharPtr volume;
1770 Char year [8];
1771
1772 if (cjp == NULL) return NULL;
1773
1774 ttl = cjp->title;
1775 if (ttl == NULL) return NULL;
1776
1777 /* always use iso_jta title if present */
1778
1779 while (ttl != NULL && ttl->choice != Cit_title_iso_jta) {
1780 ttl = ttl->next;
1781 }
1782
1783 imp = cjp->imp;
1784 if (imp == NULL) return NULL;
1785
1786 /* release mode requires iso_jta title */
1787
1788 if (imp->pubstatus == 3 || imp->pubstatus == 10) {
1789 electronic_journal = TRUE;
1790 }
1791
1792 if (ttl == NULL) {
1793 ttl = cjp->title;
1794 if (ttl != NULL && ttl->choice == Cit_title_name) {
1795 title = (CharPtr) ttl->data.ptrvalue;
1796 if (title != NULL && StringNCmp (title, "(er)", 4) == 0) {
1797 electronic_journal = TRUE;
1798 }
1799 }
1800 if (citArtIsoJta && (! electronic_journal)) return NULL;
1801 }
1802
1803 dp = imp->date;
1804 year [0] = '\0';
1805 if (dp != NULL) {
1806 if (dp->data [0] == 1) {
1807 if (dp->data [1] != 0) {
1808 sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
1809 }
1810 } else if (StringDoesHaveText (dp->str) && StringCmp (dp->str, "?") != 0) {
1811 StringCpy (year, " (");
1812 StringNCat (year, dp->str, 4);
1813 StringCat (year, ")");
1814 }
1815 }
1816
1817 if (imp->prepub == 1 || imp->prepub == 255) {
1818 sprintf (buf, "Unpublished %s", year);
1819 return StringSave (buf);
1820 }
1821
1822 if (ttl != NULL) {
1823 title = (CharPtr) ttl->data.ptrvalue;
1824 }
1825 if (StringLen (title) < 3) return StringSave (".");
1826
1827 /*
1828 if (imp->pubstatus == 3 || imp->pubstatus == 10) {
1829 ValNodeCopyStr (&head, 0, "(er) ");
1830 }
1831 */
1832
1833 ValNodeCopyStr (&head, 0, title);
1834
1835 volume = imp->volume;
1836 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1837 issue = imp->issue;
1838 part_sup = imp->part_sup;
1839 part_supi = imp->part_supi;
1840 }
1841 pages [0] = '\0';
1842 if (electronic_journal) {
1843 StringNCpy_0 (pages, imp->pages, sizeof (pages));
1844 } else {
1845 FixPages (pages, imp->pages);
1846 }
1847
1848 if (! StringHasNoText (volume)) {
1849 AddValNodeString (&head, " ", volume, NULL);
1850 }
1851
1852 if ((! StringHasNoText (volume)) || (! StringHasNoText (pages))) {
1853 DoSup (&head, issue, part_sup, part_supi);
1854 }
1855
1856 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1857 if (! StringHasNoText (pages)) {
1858 AddValNodeString (&head, ", ", pages, NULL);
1859 }
1860 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1861 if (! StringHasNoText (pages)) {
1862 AddValNodeString (&head, ":", pages, NULL);
1863 } else if (imp->prepub == 2 || (StringHasNoText (volume))) {
1864 ValNodeCopyStr (&head, 0, " 0:0-0");
1865 }
1866 }
1867
1868 ValNodeCopyStr (&head, 0, year);
1869
1870 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1871 if (imp->prepub == 2) {
1872 ValNodeCopyStr (&head, 0, " In press");
1873 } else if (imp->pubstatus == 10 && StringHasNoText (pages)) {
1874 ValNodeCopyStr (&head, 0, " In press");
1875 }
1876 }
1877
1878 rsult = MergeFFValNodeStrs (head);
1879 ValNodeFreeData (head);
1880
1881 return rsult;
1882 }
1883
MakeAffilStr(AffilPtr afp)1884 static CharPtr MakeAffilStr (
1885 AffilPtr afp
1886 )
1887
1888 {
1889 ValNodePtr head = NULL;
1890 CharPtr prefix = "";
1891 CharPtr rsult = NULL;
1892
1893 if (afp == NULL) return NULL;
1894
1895 if (! StringHasNoText (afp->affil)) {
1896 ValNodeCopyStr (&head, 0, afp->affil);
1897 prefix = ", ";
1898 }
1899
1900 if (afp->choice == 2) {
1901 if (! StringHasNoText (afp->div)) {
1902 AddValNodeString (&head, prefix, afp->div, NULL);
1903 prefix = ", ";
1904 }
1905 if (! StringHasNoText (afp->street)) {
1906 AddValNodeString (&head, prefix, afp->street, NULL);
1907 prefix = ", ";
1908 }
1909 if (! StringHasNoText (afp->city)) {
1910 AddValNodeString (&head, prefix, afp->city, NULL);
1911 prefix = ", ";
1912 }
1913 if (! StringHasNoText (afp->sub)) {
1914 AddValNodeString (&head, prefix, afp->sub, NULL);
1915 prefix = ", ";
1916 }
1917 if (! StringHasNoText (afp->country)) {
1918 AddValNodeString (&head, prefix, afp->country, NULL);
1919 prefix = ", ";
1920 }
1921 }
1922
1923 rsult = MergeFFValNodeStrs (head);
1924 ValNodeFreeData (head);
1925
1926 return rsult;
1927 }
1928
GetAffil(AffilPtr afp)1929 static CharPtr GetAffil (
1930 AffilPtr afp
1931 )
1932
1933 {
1934 Boolean need_comma=FALSE;
1935 CharPtr string=NULL, temp, ptr;
1936 Char ch;
1937 Int2 aflen=15;
1938
1939 if (afp == NULL) return NULL;
1940 if (afp) {
1941 if (afp -> choice == 1){
1942 if (afp -> affil){
1943 aflen += StringLen(afp -> affil);
1944 }
1945 }else if (afp -> choice == 2){
1946 aflen += StringLen (afp -> affil) +
1947 StringLen (afp -> div) +
1948 StringLen (afp -> city) +
1949 StringLen (afp -> sub) +
1950 StringLen (afp -> street) +
1951 StringLen (afp -> country) + StringLen(afp->postal_code);
1952 }
1953
1954 temp = string = MemNew(aflen);
1955
1956 if ( afp -> choice == 1){
1957 if (afp -> affil){
1958 ptr = afp->affil;
1959 while ((*temp = *ptr) != '\0')
1960 {
1961 temp++; ptr++;
1962 }
1963 }
1964 }else if (afp -> choice == 2){
1965
1966 if( afp -> div) {
1967 if (need_comma)
1968 {
1969 *temp = ','; temp++;
1970 *temp = ' '; temp++;
1971 }
1972 ptr = afp->div;
1973 while ((*temp = *ptr) != '\0')
1974 {
1975 temp++; ptr++;
1976 }
1977 need_comma = TRUE;
1978 }
1979
1980 if(afp -> affil) {
1981 if (need_comma)
1982 {
1983 *temp = ','; temp++;
1984 *temp = ' '; temp++;
1985 }
1986 ptr = afp->affil;
1987 while ((*temp = *ptr) != '\0')
1988 {
1989 temp++; ptr++;
1990 }
1991 need_comma = TRUE;
1992 }
1993
1994 if(afp -> street) {
1995 if (need_comma)
1996 {
1997 *temp = ','; temp++;
1998 *temp = ' '; temp++;
1999 }
2000 ptr = afp->street;
2001 while ((*temp = *ptr) != '\0')
2002 {
2003 temp++; ptr++;
2004 }
2005 need_comma = TRUE;
2006 }
2007
2008 if( afp -> city) {
2009 if (need_comma)
2010 {
2011 *temp = ','; temp++;
2012 *temp = ' '; temp++;
2013 }
2014 ptr = afp->city;
2015 while ((*temp = *ptr) != '\0')
2016 {
2017 temp++; ptr++;
2018 }
2019 need_comma = TRUE;
2020 }
2021
2022 if( afp -> sub) {
2023 if (need_comma)
2024 {
2025 *temp = ','; temp++;
2026 *temp = ' '; temp++;
2027 }
2028 ptr = afp->sub;
2029 while ((*temp = *ptr) != '\0')
2030 {
2031 temp++; ptr++;
2032 }
2033 need_comma = TRUE;
2034 }
2035
2036 if( afp -> postal_code){
2037 *temp = ' ';
2038 temp++;
2039 ptr = afp->postal_code;
2040 while ((*temp = *ptr) != '\0')
2041 {
2042 temp++; ptr++;
2043 }
2044 }
2045
2046 if( afp -> country){
2047 if (need_comma)
2048 {
2049 *temp = ','; temp++;
2050 *temp = ' '; temp++;
2051 }
2052 ptr = afp->country;
2053 while ((*temp = *ptr) != '\0')
2054 {
2055 temp++; ptr++;
2056 }
2057 need_comma = TRUE;
2058 }
2059 }
2060 temp++;
2061 *temp = '\0';
2062 }
2063
2064 /* convert double quotes to single quotes */
2065
2066 ptr = string;
2067 ch = *ptr;
2068 while (ch != '\0') {
2069 if (ch == '\"') {
2070 *ptr = '\'';
2071 }
2072 ptr++;
2073 ch = *ptr;
2074 }
2075
2076 return string;
2077 }
2078
GetFlatFileAffilString(AffilPtr afp)2079 NLM_EXTERN CharPtr GetFlatFileAffilString (AffilPtr afp)
2080 {
2081 return GetAffil (afp);
2082 }
2083
2084
FormatCitBookArt(FmtType format,CitBookPtr cbp)2085 static CharPtr FormatCitBookArt (
2086 FmtType format,
2087 CitBookPtr cbp
2088 )
2089
2090 {
2091 AffilPtr afp;
2092 AuthListPtr alp;
2093 CharPtr book_title = NULL;
2094 Char buf [256];
2095 Char ch;
2096 DatePtr dp;
2097 ValNodePtr head = NULL;
2098 ImprintPtr imp;
2099 CharPtr issue = NULL;
2100 ValNodePtr names = NULL;
2101 Char pages [128];
2102 CharPtr part_sup = NULL;
2103 CharPtr part_supi = NULL;
2104 CharPtr rsult = NULL;
2105 CharPtr str;
2106 CharPtr title;
2107 ValNodePtr ttl;
2108 ValNodePtr vnp;
2109 CharPtr volume;
2110 Char year [8];
2111
2112 if (cbp == NULL) return NULL;
2113
2114 ttl = cbp->title;
2115 if (ttl == NULL) return NULL;
2116
2117 imp = cbp->imp;
2118 if (imp == NULL) return NULL;
2119
2120 dp = imp->date;
2121 year [0] = '\0';
2122 if (dp != NULL) {
2123 if (dp->data [0] == 1) {
2124 if (dp->data [1] != 0) {
2125 sprintf (year, "(%ld)", (long) (1900 + dp->data [1]));
2126 }
2127 } else {
2128 StringCpy (year, "(");
2129 StringNCat (year, dp->str, 4);
2130 StringNCat (year, ")", 1);
2131 }
2132 }
2133
2134 if (imp->prepub == 1 || imp->prepub == 255) {
2135 sprintf (buf, "Unpublished %s", year);
2136 return StringSave (buf);
2137 }
2138
2139 title = (CharPtr) ttl->data.ptrvalue;
2140 if (StringLen (title) < 3) return StringSave (".");
2141
2142 ValNodeCopyStr (&head, 0, "(in) ");
2143
2144 alp = cbp->authors;
2145 if (alp != NULL) {
2146 str = GetAuthorsString (format, alp, NULL, NULL, NULL);
2147 if (str != NULL) {
2148 ValNodeCopyStr (&head, 0, str);
2149 names = alp->names;
2150 if (names != NULL) {
2151 if (names->next != NULL) {
2152 ValNodeCopyStr (&head, 0, " (Eds.);");
2153 } else {
2154 ValNodeCopyStr (&head, 0, " (Ed.);");
2155 }
2156 }
2157 ValNodeCopyStr (&head, 0, "\n");
2158 }
2159 MemFree (str);
2160 }
2161
2162 book_title = StringSaveNoNull (title);
2163 vnp = ValNodeAddStr (&head, 0, book_title);
2164 if (book_title != NULL) {
2165
2166 /* make book title all caps */
2167
2168 title = book_title;
2169 ch = *title;
2170 while (ch != '\0') {
2171 *title = TO_UPPER (ch);
2172 title++;
2173 ch = *title;
2174 }
2175 }
2176
2177 volume = imp->volume;
2178 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2179 issue = imp->issue;
2180 part_sup = imp->part_sup;
2181 part_supi = imp->part_supi;
2182 }
2183 pages [0] = '\0';
2184 FixPages (pages, imp->pages);
2185
2186 if ((! StringHasNoText (volume)) && (StringCmp (volume, "0") != 0)) {
2187 AddValNodeString (&head, ", Vol. ", volume, NULL);
2188 DoSup (&head, issue, part_sup, part_supi);
2189 }
2190
2191 if (! StringHasNoText (pages)) {
2192 AddValNodeString (&head, ": ", pages, NULL);
2193 }
2194
2195 if (book_title != NULL) {
2196 ValNodeCopyStr (&head, 0, ";\n");
2197 }
2198
2199 afp = imp->pub;
2200 if (afp != NULL) {
2201 str = MakeAffilStr (afp);
2202 if (str != NULL) {
2203 ValNodeCopyStr (&head, 0, str);
2204 ValNodeCopyStr (&head, 0, " ");
2205 MemFree (str);
2206 }
2207 }
2208
2209 AddValNodeString (&head, NULL, year, NULL);
2210
2211 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2212 if (imp->prepub == 2) {
2213 ValNodeCopyStr (&head, 0, " In press");
2214 }
2215 }
2216
2217 rsult = MergeFFValNodeStrs (head);
2218 ValNodeFreeData (head);
2219
2220 return rsult;
2221 }
2222
FormatCitBook(FmtType format,CitBookPtr cbp)2223 static CharPtr FormatCitBook (
2224 FmtType format,
2225 CitBookPtr cbp
2226 )
2227
2228 {
2229 AffilPtr afp;
2230 char year[5];
2231 CharPtr bookTitle=NULL;
2232 CharPtr retval = NULL;
2233 CharPtr temp;
2234 DatePtr dp;
2235 ImprintPtr ip;
2236 int aflen = 0;
2237 CharPtr p;
2238 CharPtr affilStr = NULL;
2239
2240 /* Check parameters */
2241
2242 if (cbp == NULL)
2243 return NULL;
2244
2245 if ( cbp -> othertype != 0)
2246 return NULL;
2247
2248 ip = cbp -> imp;
2249
2250 /* Format the year */
2251
2252 dp = ip -> date;
2253 year[0] = '\0';
2254
2255 if ( dp -> data[0] == 1)
2256 sprintf(year,"%ld",(long) ( 1900+dp -> data[1]));
2257 else
2258 {
2259 StringNCpy( (CharPtr) year, (CharPtr) dp -> str, (size_t) 4);
2260 year[4] = '\0';
2261 }
2262
2263 /* Get the book title */
2264
2265 if (cbp->title)
2266 bookTitle = StringSave(cbp -> title -> data.ptrvalue);
2267
2268 /* Get the affiliation length */
2269
2270 if ( ip -> pub){
2271 afp = ip -> pub;
2272 aflen = StringLen(afp -> affil)+ 5;
2273 if ( afp -> choice == 2){
2274 aflen += 3 + StringLen(afp -> div);
2275 aflen += 3 + StringLen(afp -> street);
2276 aflen += 3 + StringLen(afp -> city);
2277 aflen += 3 + StringLen(afp -> sub);
2278 aflen += 3 + StringLen(afp -> country);
2279 }
2280 } else{
2281 aflen = 22;
2282 }
2283 if (ip->prepub == 2)
2284 aflen += 10;
2285
2286 /* Create a Char String big enough to hold */
2287 /* the title, year, and affiliation. */
2288
2289 temp = retval = MemNew( (size_t) (30+StringLen( bookTitle)+StringLen( year) + aflen) );
2290
2291 /* Convert the title to upper case and */
2292 /* add it to the string. */
2293
2294 if (bookTitle != NULL) {
2295 for ( p = bookTitle; *p; p++) {
2296 *p = TO_UPPER(*p);
2297 }
2298 }
2299
2300 /* temp = StringMove(temp, "Book: "); */
2301 temp = StringMove(temp, "(in) ");
2302 temp = StringMove(temp, bookTitle);
2303 temp = StringMove(temp, ".");
2304
2305 /* Add the affiliation to the string */
2306
2307 if ( ip -> pub)
2308 {
2309 afp = ip -> pub;
2310 *temp = ' ';
2311 temp++;
2312 affilStr = MakeAffilStr(afp);
2313 temp = StringMove(temp,affilStr);
2314 }
2315
2316 /* Add the year to the string */
2317
2318 if (year[0] != '\0')
2319 {
2320 if (affilStr != NULL)
2321 temp = StringMove(temp," (");
2322 else
2323 temp = StringMove(temp, "(");
2324 temp = StringMove(temp, year);
2325 temp = StringMove(temp, ")");
2326 }
2327
2328 /* If in press, add note */
2329
2330 if (ip->prepub == 2)
2331 temp = StringMove(temp, ", In press");
2332
2333 /* Clean up and return */
2334
2335 if (bookTitle)
2336 MemFree(bookTitle);
2337
2338 return retval;
2339
2340 }
2341
FormatThesis(FmtType format,CitBookPtr cbp)2342 static CharPtr FormatThesis (
2343 FmtType format,
2344 CitBookPtr cbp
2345 )
2346
2347 {
2348 AffilPtr afp;
2349 Char ch;
2350 DatePtr dp;
2351 ValNodePtr head = NULL;
2352 ImprintPtr imp;
2353 CharPtr ptr;
2354 CharPtr rsult = NULL;
2355 CharPtr str;
2356 CharPtr suffix = NULL;
2357 Char year [8];
2358
2359 if (cbp == NULL) return NULL;
2360 if (cbp->othertype != 2 || cbp->let_type != 3) return NULL;
2361
2362 imp = cbp->imp;
2363 if (imp == NULL) return NULL;
2364
2365 dp = imp->date;
2366 year [0] = '\0';
2367 if (dp != NULL) {
2368 if (dp->data [0] == 1) {
2369 if (dp->data [1] != 0) {
2370 sprintf (year, "%ld", (long) (1900 + dp->data [1]));
2371 }
2372 } else {
2373 StringNCpy (year, dp->str, (size_t) 4);
2374 year [4] = '\0';
2375 }
2376 }
2377
2378 AddValNodeString (&head, "Thesis (", year, ")");
2379
2380 if (imp->prepub == 2) {
2381 suffix = ", In press";
2382 }
2383
2384 str = NULL;
2385 afp = imp->pub;
2386 if (afp != NULL) {
2387 if (afp->choice == 1) {
2388 str = StringSave (afp->affil);
2389 } else if (afp->choice == 2) {
2390 str = MakeAffilStr (afp);
2391 }
2392 }
2393
2394 if (str != NULL) {
2395
2396 /* convert double quotes to single quotes */
2397
2398 ptr = str;
2399 ch = *ptr;
2400 while (ch != '\0') {
2401 if (ch == '\"') {
2402 *ptr = '\'';
2403 }
2404 ptr++;
2405 ch = *ptr;
2406 }
2407 AddValNodeString (&head, " ", str, suffix);
2408 MemFree (str);
2409 }
2410
2411 rsult = MergeFFValNodeStrs (head);
2412 ValNodeFreeData (head);
2413
2414 return rsult;
2415 }
2416
FormatCitArt(FmtType format,Boolean citArtIsoJta,CitArtPtr cap)2417 static CharPtr FormatCitArt (
2418 FmtType format,
2419 Boolean citArtIsoJta,
2420 CitArtPtr cap
2421 )
2422
2423 {
2424 CitBookPtr cbp;
2425 CitJourPtr cjp;
2426 CharPtr rsult = NULL;
2427
2428 if (cap == NULL) return NULL;
2429
2430 switch (cap->from) {
2431 case 1 :
2432 cjp = (CitJourPtr) cap->fromptr;
2433 if (cjp != NULL) {
2434 rsult = FormatCitJour (format, citArtIsoJta, cjp);
2435 }
2436 break;
2437 case 2 :
2438 cbp = (CitBookPtr) cap->fromptr;
2439 if (cbp != NULL) {
2440 rsult = FormatCitBookArt (format, cbp);
2441 }
2442 break;
2443 case 3 :
2444 cbp = (CitBookPtr) cap->fromptr;
2445 if (cbp != NULL) {
2446 rsult = FormatCitBookArt (format, cbp);
2447 }
2448 break;
2449 default :
2450 break;
2451 }
2452
2453 return rsult;
2454 }
2455
FormatCitPat(FmtType format,ModType mode,CitPatPtr cpp,SeqIdPtr seqidp,IntAsn2gbJobPtr ajp)2456 static CharPtr FormatCitPat (
2457 FmtType format,
2458 ModType mode,
2459 CitPatPtr cpp,
2460 SeqIdPtr seqidp,
2461 IntAsn2gbJobPtr ajp
2462 )
2463
2464 {
2465 AffilPtr afp;
2466 AuthListPtr alp;
2467 IdPatPtr cit;
2468 CharPtr consortium = NULL;
2469 Char date [40];
2470 ValNodePtr head = NULL;
2471 Boolean is_us_pre_grant = FALSE;
2472 CharPtr prefix = NULL;
2473 CharPtr rsult = NULL;
2474 SeqIdPtr sip;
2475 CharPtr str;
2476 CharPtr suffix = NULL;
2477 PatentSeqIdPtr psip;
2478 Int4 pat_seqid = 0;
2479 Char buf[10];
2480
2481 if (cpp == NULL) return NULL;
2482
2483 if (StringHasNoText (cpp->number) &&
2484 StringDoesHaveText (cpp->app_number) &&
2485 StringCmp (cpp->country, "US") == 0 &&
2486 mode != RELEASE_MODE) {
2487 for (sip = seqidp; sip != NULL; sip = sip->next) {
2488 if (sip->choice != SEQID_PATENT) continue;
2489 psip = (PatentSeqIdPtr) sip->data.ptrvalue;
2490 if (psip == NULL) continue;
2491 cit = psip->cit;
2492 if (cit == NULL) continue;
2493 if (StringDoesHaveText (cit->app_number)) {
2494 is_us_pre_grant = TRUE;
2495 }
2496 }
2497 }
2498
2499 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2500 if (is_us_pre_grant) {
2501 ValNodeCopyStr (&head, 0, "Pre-Grant Patent: ");
2502 suffix = " ";
2503 } else {
2504 ValNodeCopyStr (&head, 0, "Patent: ");
2505 suffix = " ";
2506 }
2507 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2508 ValNodeCopyStr (&head, 0, "Patent number ");
2509 }
2510
2511 if (! StringHasNoText (cpp->country)) {
2512 AddValNodeString (&head, NULL, cpp->country, suffix);
2513 }
2514
2515 if (! StringHasNoText (cpp->number)) {
2516 if (ajp != NULL && GetWWW (ajp) && StringCmp (cpp->country, "US") == 0) {
2517 ValNodeCopyStr (&head, 0, "<a href=\"");
2518 ValNodeCopyStr (&head, 0, link_uspto);
2519 ValNodeCopyStr (&head, 0, cpp->number);
2520 ValNodeCopyStr (&head, 0, "\">");
2521 ValNodeCopyStr (&head, 0, cpp->number);
2522 ValNodeCopyStr (&head, 0, "</a>");
2523 } else {
2524 ValNodeCopyStr (&head, 0, cpp->number);
2525 }
2526 } else if (! StringHasNoText (cpp->app_number)) {
2527 if (is_us_pre_grant) {
2528 AddValNodeString (&head, NULL, cpp->app_number, NULL);
2529 } else {
2530 AddValNodeString (&head, "(", cpp->app_number, ")");
2531 }
2532 }
2533
2534 if (! StringHasNoText (cpp->doc_type)) {
2535 AddValNodeString (&head, "-", cpp->doc_type, NULL);
2536 }
2537
2538 /* pat_seqid test */
2539
2540 for (sip = seqidp; sip != NULL; sip = sip->next) {
2541 if (sip->choice == SEQID_PATENT) {
2542 psip = (PatentSeqIdPtr) sip -> data.ptrvalue;
2543 if (psip != NULL) {
2544 pat_seqid = psip->seqid;
2545 }
2546 }
2547 }
2548 if (pat_seqid > 0) {
2549 if (format == EMBL_FMT) {
2550 sprintf(buf,"%s%ld%s", "/", (long) pat_seqid, ", ");
2551 ValNodeCopyStr (&head, 0, buf);
2552 } else {
2553 sprintf(buf,"%s%ld ", " ", (long) pat_seqid);
2554 ValNodeCopyStr (&head, 0, buf);
2555 }
2556 } else {
2557 ValNodeCopyStr (&head, 0, " ");
2558 }
2559
2560 /* Date */
2561
2562 date [0] = '\0';
2563 if (cpp->date_issue != NULL) {
2564 DateToFF (date, cpp->date_issue, FALSE);
2565 } else if (cpp->app_date != NULL) {
2566 DateToFF (date, cpp->app_date, FALSE);
2567 }
2568 if (! StringHasNoText (date)) {
2569 ValNodeCopyStr (&head, 0, date);
2570 }
2571
2572 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2573 ValNodeCopyStr (&head, 0, ";");
2574 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2575 ValNodeCopyStr (&head, 0, ".");
2576 }
2577
2578 alp = cpp->authors;
2579 if (alp != NULL) {
2580 afp = alp->affil;
2581 if (afp != NULL) {
2582 suffix = NULL;
2583 if (afp->choice == 2) {
2584 suffix = ";";
2585 }
2586
2587 /* If any of the affiliation fields are */
2588 /* non-blank, put them on a new line. */
2589
2590 if ((! StringHasNoText (afp->affil)) ||
2591 (! StringHasNoText (afp->street)) ||
2592 (! StringHasNoText (afp->div)) ||
2593 (! StringHasNoText (afp->city)) ||
2594 (! StringHasNoText (afp->sub)) ||
2595 (! StringHasNoText (afp->country)))
2596 ValNodeCopyStr (&head, 0, "\n");
2597
2598 /* Write out the affiliation fields */
2599
2600 if (! StringHasNoText (afp->affil)) {
2601 AddValNodeString (&head, NULL, afp->affil, suffix);
2602 prefix = " ";
2603 }
2604 if (! StringHasNoText (afp->street)) {
2605 AddValNodeString (&head, prefix, afp->street, ";");
2606 prefix = " ";
2607 }
2608 if (! StringHasNoText (afp->div)) {
2609 AddValNodeString (&head, prefix, afp->div, ";");
2610 prefix = " ";
2611 }
2612 if (! StringHasNoText (afp->city)) {
2613 AddValNodeString (&head, prefix, afp->city, NULL);
2614 prefix = ", ";
2615 }
2616 if (! StringHasNoText (afp->sub)) {
2617 AddValNodeString (&head, prefix, afp->sub, NULL);
2618 }
2619 if (! StringHasNoText (afp->country)) {
2620 AddValNodeString (&head, ";\n", afp->country, ";");
2621 }
2622 }
2623 }
2624
2625 alp = cpp->assignees;
2626 if (alp != NULL) {
2627 str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
2628 afp = alp->affil;
2629 if (afp != NULL) {
2630 suffix = NULL;
2631 if (afp->choice == 2) {
2632 suffix = ";";
2633 }
2634
2635 /* If any of the affiliation fields are */
2636 /* non-blank, put them on a new line. */
2637
2638 if ((! StringHasNoText (str)) ||
2639 (! StringHasNoText (consortium)) ||
2640 (! StringHasNoText (afp->affil)) ||
2641 (! StringHasNoText (afp->street)) ||
2642 (! StringHasNoText (afp->div)) ||
2643 (! StringHasNoText (afp->city)) ||
2644 (! StringHasNoText (afp->sub)) ||
2645 (! StringHasNoText (afp->country)))
2646 ValNodeCopyStr (&head, 0, "\n");
2647
2648 if (! StringHasNoText (str)) {
2649 AddValNodeString (&head, NULL, str, ";");
2650 prefix = " ";
2651 }
2652 if (! StringHasNoText (consortium)) {
2653 AddValNodeString (&head, NULL, consortium, ";");
2654 prefix = " ";
2655 }
2656
2657 /* Write out the affiliation fields */
2658
2659 if (! StringHasNoText (afp->affil)) {
2660 AddValNodeString (&head, NULL, afp->affil, suffix);
2661 prefix = " ";
2662 }
2663 if (! StringHasNoText (afp->street)) {
2664 AddValNodeString (&head, prefix, afp->street, ";");
2665 prefix = " ";
2666 }
2667 if (! StringHasNoText (afp->div)) {
2668 AddValNodeString (&head, prefix, afp->div, ";");
2669 prefix = " ";
2670 }
2671 if (! StringHasNoText (afp->city)) {
2672 AddValNodeString (&head, prefix, afp->city, NULL);
2673 prefix = ", ";
2674 }
2675 if (! StringHasNoText (afp->sub)) {
2676 AddValNodeString (&head, prefix, afp->sub, NULL);
2677 }
2678 if (! StringHasNoText (afp->country)) {
2679 AddValNodeString (&head, ";\n", afp->country, ";");
2680 }
2681 }
2682 MemFree (consortium);
2683 MemFree (str);
2684 }
2685
2686 rsult = MergeFFValNodeStrs (head);
2687 ValNodeFreeData (head);
2688
2689 /*
2690 s_StringCleanup(rsult);
2691 */
2692
2693 return rsult;
2694 }
2695
FormatCitGen(FmtType format,Boolean dropBadCitGens,Boolean is_ed,Boolean noAffilOnUnpub,CitGenPtr cgp)2696 static CharPtr FormatCitGen (
2697 FmtType format,
2698 Boolean dropBadCitGens,
2699 Boolean is_ed,
2700 Boolean noAffilOnUnpub,
2701 CitGenPtr cgp
2702 )
2703
2704 {
2705 CharPtr affil = NULL;
2706 AuthListPtr alp = NULL;
2707 Char ch;
2708 DatePtr dp;
2709 ValNodePtr head = NULL;
2710 CharPtr inpress = NULL;
2711 CharPtr journal = NULL;
2712 Char pages [128];
2713 CharPtr prefix = NULL;
2714 CharPtr ptr;
2715 CharPtr rsult = NULL;
2716 Char year [8];
2717
2718 if (cgp == NULL) return NULL;
2719
2720 if (cgp->journal == NULL && StringNICmp (cgp->cit, "unpublished", 11) == 0) {
2721 if (noAffilOnUnpub) {
2722
2723 /* !!! temporarily put date in unpublished citation for QA !!! */
2724
2725 if (dropBadCitGens && is_ed) {
2726 year [0] = '\0';
2727 dp = cgp->date;
2728 if (dp != NULL) {
2729 if (dp->data [0] == 1) {
2730 if (dp->data [1] != 0) {
2731 sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
2732 }
2733 } else {
2734 StringCpy (year, " (");
2735 StringNCat (year, dp->str, 4);
2736 StringCat (year, ")");
2737 }
2738 }
2739 AddValNodeString (&head, NULL, "Unpublished", NULL);
2740 AddValNodeString (&head, NULL, year, NULL);
2741 rsult = MergeFFValNodeStrs (head);
2742 ValNodeFreeData (head);
2743 return rsult;
2744 }
2745
2746 /* !!! remove above section once QA against asn2ff is done !!! */
2747
2748 return StringSave ("Unpublished");
2749 }
2750
2751 alp = cgp->authors;
2752 if (alp != NULL) {
2753 affil = GetAffil (alp->affil);
2754 if (! StringHasNoText (affil)) {
2755 rsult = MemNew ((size_t) StringLen (affil) + (size_t) StringLen (cgp->cit) + 15);
2756 StringCpy (rsult, "Unpublished ");
2757 StringCat (rsult, affil);
2758 TrimSpacesAroundString (rsult);
2759 return rsult;
2760 }
2761 }
2762
2763 rsult = StringSave (cgp->cit);
2764 TrimSpacesAroundString (rsult);
2765 return rsult;
2766 }
2767
2768 year [0] = '\0';
2769 dp = cgp->date;
2770 if (dp != NULL) {
2771 if (dp->data [0] == 1) {
2772 if (dp->data [1] != 0) {
2773 sprintf (year, " (%ld)", (long) (1900 + dp->data [1]));
2774 }
2775 } else {
2776 StringCpy (year, " (");
2777 StringNCat (year, dp->str, 4);
2778 StringCat (year, ")");
2779 }
2780 }
2781
2782 pages [0] = '\0';
2783 if (cgp->pages != NULL) {
2784 FixPages (pages, cgp->pages);
2785 }
2786
2787 if (cgp->journal != NULL) {
2788 journal = (CharPtr) cgp->journal->data.ptrvalue;
2789 }
2790 if (cgp->cit != NULL) {
2791 ptr = StringStr (cgp->cit, "Journal=\"");
2792 if (ptr != NULL) {
2793 journal = ptr + 9;
2794 } else if (StringNICmp (cgp->cit, "submitted", 8) == 0 ||
2795 StringNICmp (cgp->cit, "unpublished", 11) == 0) {
2796
2797 if ((! dropBadCitGens) || journal != NULL) {
2798 inpress = cgp->cit;
2799 } else {
2800 inpress = "Unpublished";
2801 }
2802 } else if (StringNICmp (cgp->cit, "Online Publication", 18) == 0 ||
2803 StringNICmp (cgp->cit, "Published Only in DataBase", 26) == 0 ||
2804 StringNICmp (cgp->cit, "In press", 8) == 0 ) {
2805 inpress = cgp->cit;
2806 } else if (StringNICmp (cgp->cit, "(er) ", 5) == 0) {
2807 journal = cgp->cit;
2808 } else if ((! dropBadCitGens) && journal == NULL) {
2809 journal = cgp->cit;
2810 }
2811 }
2812 if (journal != NULL) {
2813 journal = StringSave (journal);
2814 for (ptr = journal, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
2815 if (ch == '=' || ch == '\"') {
2816 *ptr = '\0';
2817 }
2818 }
2819 ValNodeAddStr (&head, 0, journal);
2820 prefix = " ";
2821 }
2822
2823 if (! StringHasNoText (inpress)) {
2824 AddValNodeString (&head, prefix, inpress, NULL);
2825 prefix = " ";
2826 }
2827
2828 if (! StringHasNoText (cgp->volume)) {
2829 AddValNodeString (&head, prefix, cgp->volume, NULL);
2830 }
2831
2832 if (! StringHasNoText (pages)) {
2833 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2834 AddValNodeString (&head, ", ", pages, NULL);
2835 } else if (format == EMBL_FMT) {
2836 AddValNodeString (&head, ":", pages, NULL);
2837 }
2838 }
2839
2840 if (! StringHasNoText (year)) {
2841 AddValNodeString (&head, NULL, year, NULL);
2842 }
2843
2844 rsult = MergeFFValNodeStrs (head);
2845 ValNodeFreeData (head);
2846
2847 return rsult;
2848 }
2849
FormatCitSub(FmtType format,CitSubPtr csp)2850 static CharPtr FormatCitSub (
2851 FmtType format,
2852 CitSubPtr csp
2853 )
2854
2855 {
2856 CharPtr affil;
2857 AffilPtr afp;
2858 AuthListPtr alp;
2859 Char buf [256];
2860 Char date [40];
2861 ValNodePtr head = NULL;
2862 CharPtr rsult = NULL;
2863
2864 if (csp == NULL) return NULL;
2865
2866 date [0] = '\0';
2867 if (csp->date != NULL) {
2868 DateToFF (date, csp->date, TRUE);
2869 }
2870 if (StringHasNoText (date)) {
2871 StringCpy (date, "\?\?-\?\?\?-\?\?\?\?");
2872 }
2873
2874 sprintf (buf, "Submitted (%s)", date);
2875 ValNodeCopyStr (&head, 0, buf);
2876
2877 alp = csp->authors;
2878 if (alp != NULL) {
2879 afp = alp->affil;
2880 if (afp != NULL) {
2881 affil = GetAffil (afp);
2882 Asn2gnbkCompressSpaces (affil);
2883 if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2884 if (StringNCmp(affil, " to the EMBL/GenBank/DDBJ databases.", 36) != 0) {
2885 ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
2886 } else {
2887 ValNodeCopyStr (&head, 0, " ");
2888 }
2889 } else {
2890 ValNodeCopyStr (&head, 0, " ");
2891 }
2892 ValNodeCopyStr (&head, 0, affil);
2893 MemFree (affil);
2894 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
2895 ValNodeCopyStr (&head, 0, " to the EMBL/GenBank/DDBJ databases.\n");
2896 }
2897 }
2898
2899 rsult = MergeFFValNodeStrs (head);
2900 ValNodeFreeData (head);
2901
2902 return rsult;
2903 }
2904
GetPubJournal(FmtType format,ModType mode,Boolean dropBadCitGens,Boolean is_ed,Boolean noAffilOnUnpub,Boolean citArtIsoJta,PubdescPtr pdp,CitSubPtr csp,SeqIdPtr seqidp,IndxPtr index,IntAsn2gbJobPtr ajp)2905 static CharPtr GetPubJournal (
2906 FmtType format,
2907 ModType mode,
2908 Boolean dropBadCitGens,
2909 Boolean is_ed,
2910 Boolean noAffilOnUnpub,
2911 Boolean citArtIsoJta,
2912 PubdescPtr pdp,
2913 CitSubPtr csp,
2914 SeqIdPtr seqidp,
2915 IndxPtr index,
2916 IntAsn2gbJobPtr ajp
2917 )
2918
2919 {
2920 CitArtPtr cap;
2921 CitBookPtr cbp;
2922 CitGenPtr cgp;
2923 CitPatPtr cpp;
2924 CharPtr journal = NULL;
2925 MedlineEntryPtr mep;
2926 ValNodePtr vnp;
2927
2928 if (csp != NULL) {
2929 return FormatCitSub (format, csp);
2930 }
2931 if (pdp == NULL) return NULL;
2932
2933 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
2934 switch (vnp->choice) {
2935 case PUB_Gen :
2936 cgp = (CitGenPtr) vnp->data.ptrvalue;
2937 if (cgp != NULL) {
2938 if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
2939 if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
2940 break; /* skip just serial number */
2941 }
2942 }
2943 journal = FormatCitGen (format, dropBadCitGens, is_ed, noAffilOnUnpub, cgp);
2944 }
2945 break;
2946 case PUB_Sub :
2947 csp = (CitSubPtr) vnp->data.ptrvalue;
2948 if (csp != NULL) {
2949 journal = FormatCitSub (format, csp);
2950 }
2951 break;
2952 case PUB_Medline :
2953 mep = (MedlineEntryPtr) vnp->data.ptrvalue;
2954 if (mep != NULL) {
2955 cap = mep->cit;
2956 if (cap != NULL) {
2957 journal = FormatCitArt (format, citArtIsoJta, cap);
2958 }
2959 }
2960 break;
2961 case PUB_Article :
2962 cap = (CitArtPtr) vnp->data.ptrvalue;
2963 if (cap != NULL) {
2964 journal = FormatCitArt (format, citArtIsoJta, cap);
2965 }
2966 break;
2967 case PUB_Book :
2968 case PUB_Proc :
2969 cbp = (CitBookPtr) vnp->data.ptrvalue;
2970 if (cbp != NULL) {
2971 journal = FormatCitBook (format, cbp);
2972 }
2973 break;
2974 case PUB_Man :
2975 cbp = (CitBookPtr) vnp->data.ptrvalue;
2976 if (cbp != NULL) {
2977 journal = FormatThesis (format, cbp);
2978 }
2979 break;
2980 case PUB_Patent :
2981 cpp = (CitPatPtr) vnp->data.ptrvalue;
2982 if (cpp != NULL) {
2983 journal = FormatCitPat (format, mode, cpp, seqidp, ajp);
2984 }
2985 break;
2986 default :
2987 break;
2988 }
2989
2990 /* optionally populate indexes for NCBI internal database */
2991
2992 if (index != NULL && journal != NULL) {
2993
2994 /* skip non-informative cit-gens */
2995
2996 if (StringNICmp (journal, "submitted", 8) == 0 ||
2997 StringNICmp (journal, "unpublished", 11) == 0 ||
2998 StringNICmp (journal, "Online Publication", 18) == 0 ||
2999 StringNICmp (journal, "Published Only in DataBase", 26) == 0) {
3000 } else {
3001 ValNodeCopyStrToHead (&(index->journals), 0, journal);
3002 }
3003 }
3004
3005 if (journal != NULL) return journal;
3006 }
3007
3008 return NULL;
3009 }
3010
GetMuid(PubdescPtr pdp)3011 static Int4 GetMuid (
3012 PubdescPtr pdp
3013 )
3014
3015 {
3016 ArticleIdPtr aip;
3017 CitArtPtr cap;
3018 MedlineEntryPtr mep;
3019 ValNodePtr vnp;
3020
3021 if (pdp == NULL) return 0;
3022
3023 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3024 switch (vnp->choice) {
3025 case PUB_Medline :
3026 mep = (MedlineEntryPtr) vnp->data.ptrvalue;
3027 if (mep != NULL) {
3028 return mep->uid;
3029 }
3030 break;
3031 case PUB_Muid :
3032 return vnp->data.intvalue;
3033 case PUB_Article:
3034 cap = (CitArtPtr) vnp->data.ptrvalue;
3035 if (cap!= NULL && cap->ids != NULL) {
3036 for (aip = cap->ids; aip != NULL; aip = aip->next) {
3037 if (aip->choice == ARTICLEID_MEDLINE) {
3038 return aip->data.intvalue;
3039 }
3040 }
3041 }
3042 default :
3043 break;
3044 }
3045 }
3046
3047 return 0;
3048 }
3049
GetPmid(PubdescPtr pdp)3050 static Int4 GetPmid (
3051 PubdescPtr pdp
3052 )
3053
3054 {
3055 ArticleIdPtr aip;
3056 CitArtPtr cap;
3057 MedlineEntryPtr mep;
3058 ValNodePtr vnp;
3059
3060 if (pdp == NULL) return 0;
3061
3062 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3063 switch (vnp->choice) {
3064 case PUB_Medline :
3065 mep = (MedlineEntryPtr) vnp->data.ptrvalue;
3066 if (mep != NULL) {
3067 return mep->pmid;
3068 }
3069 break;
3070 case PUB_PMid :
3071 return vnp->data.intvalue;
3072 case PUB_Article:
3073 cap = (CitArtPtr) vnp->data.ptrvalue;
3074 if (cap!= NULL && cap->ids != NULL) {
3075 for (aip = cap->ids; aip != NULL; aip = aip->next) {
3076 if (aip->choice == ARTICLEID_PUBMED) {
3077 return aip->data.intvalue;
3078 }
3079 }
3080 }
3081 default :
3082 break;
3083 }
3084 }
3085
3086 return 0;
3087 }
3088
GetDOI(PubdescPtr pdp)3089 static CharPtr GetDOI (
3090 PubdescPtr pdp
3091 )
3092
3093 {
3094 ArticleIdPtr aip;
3095 CitArtPtr cap;
3096 ValNodePtr vnp;
3097
3098 if (pdp == NULL) return 0;
3099
3100 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3101 switch (vnp->choice) {
3102 case PUB_Article:
3103 cap = (CitArtPtr) vnp->data.ptrvalue;
3104 if (cap!= NULL && cap->ids != NULL) {
3105 for (aip = cap->ids; aip != NULL; aip = aip->next) {
3106 if (aip->choice == ARTICLEID_DOI) {
3107 return (CharPtr) aip->data.ptrvalue;
3108 }
3109 }
3110 }
3111 default :
3112 break;
3113 }
3114 }
3115
3116 return 0;
3117 }
3118
CleanQualValue(CharPtr str)3119 NLM_EXTERN CharPtr CleanQualValue (
3120 CharPtr str
3121 )
3122
3123 {
3124 Char ch;
3125 CharPtr dst;
3126 CharPtr ptr;
3127
3128 if (str == NULL || str [0] == '\0') return NULL;
3129
3130 dst = str;
3131 ptr = str;
3132 ch = *ptr;
3133 while (ch != '\0') {
3134 if (ch == '\n' || ch == '\r' || ch == '\t' || ch == '"') {
3135 *dst = ' ';
3136 dst++;
3137 } else {
3138 *dst = ch;
3139 dst++;
3140 }
3141 ptr++;
3142 ch = *ptr;
3143 }
3144 *dst = '\0';
3145
3146 return str;
3147 }
3148
3149 #define twocommas ((',') << 8 | (','))
3150 #define twospaces ((' ') << 8 | (' '))
3151 #define twosemicolons ((';') << 8 | (';'))
3152 #define space_comma ((' ') << 8 | (','))
3153 #define space_bracket ((' ') << 8 | (')'))
3154 #define bracket_space (('(') << 8 | (' '))
3155 #define space_semicolon ((' ') << 8 | (';'))
3156 #define comma_space ((',') << 8 | (' '))
3157 #define semicolon_space ((';') << 8 | (' '))
3158
Asn2gnbkCompressSpaces(CharPtr str)3159 NLM_EXTERN CharPtr Asn2gnbkCompressSpaces (CharPtr str)
3160
3161 {
3162 Char ch;
3163 Char last;
3164 CharPtr dst;
3165 CharPtr ptr;
3166
3167 Char curr;
3168 Char next;
3169 CharPtr in;
3170 CharPtr out;
3171 Uint2 two_chars;
3172
3173 if (str == NULL || str [0] == '\0') return str;
3174
3175 in = str;
3176 out = str;
3177
3178 curr = *in;
3179 in++;
3180
3181 next = 0;
3182 two_chars = curr;
3183
3184 while (curr != '\0') {
3185 next = *in;
3186 in++;
3187
3188 two_chars = (two_chars << 8) | next;
3189
3190 if (two_chars == twocommas) {
3191 *out++ = curr;
3192 next = ' ';
3193 } else if (two_chars == twospaces) {
3194 } else if (two_chars == twosemicolons) {
3195 } else if (two_chars == bracket_space) {
3196 next = curr;
3197 two_chars = curr;
3198 } else if (two_chars == space_bracket) {
3199 } else if (two_chars == space_comma) {
3200 *out++ = next;
3201 next = curr;
3202 *out++ = ' ';
3203 while (next == ' ' || next == ',') {
3204 next = *in;
3205 in++;
3206 }
3207 two_chars = next;
3208 } else if (two_chars == space_semicolon) {
3209 *out++ = next;
3210 next = curr;
3211 *out++ = ' ';
3212 while (next == ' ' || next == ';') {
3213 next = *in;
3214 in++;
3215 }
3216 two_chars = next;
3217 } else if (two_chars == comma_space) {
3218 *out++ = curr;
3219 *out++ = ' ';
3220 while (next == ' ' || next == ',') {
3221 next = *in;
3222 in++;
3223 }
3224 two_chars = next;
3225 } else if (two_chars == semicolon_space) {
3226 *out++ = curr;
3227 *out++ = ' ';
3228 while (next == ' ' || next == ';') {
3229 next = *in;
3230 in++;
3231 }
3232 two_chars = next;
3233 } else {
3234 *out++ = curr;
3235 }
3236
3237 curr = next;
3238 }
3239
3240 if (curr > 0 && curr != ' ') {
3241 *out = curr;
3242 out++;
3243 }
3244 *out = '\0';
3245
3246 /* TrimSpacesAroundString but allow leading/trailing tabs/newlines */
3247
3248 if (str != NULL && str [0] != '\0') {
3249 last = '\0';
3250 dst = str;
3251 ptr = str;
3252 ch = *ptr;
3253 while (ch != '\0' && ch == ' ') {
3254 ptr++;
3255 ch = *ptr;
3256 }
3257 while (ch != '\0') {
3258 *dst = ch;
3259 dst++;
3260 ptr++;
3261 last = ch;
3262 ch = *ptr;
3263 while (ch != '\0' && last == ' ' && ch == ' ') {
3264 ptr++;
3265 ch = *ptr;
3266 }
3267 }
3268 *dst = '\0';
3269 dst = NULL;
3270 ptr = str;
3271 ch = *ptr;
3272 while (ch != '\0') {
3273 if (ch != ' ') {
3274 dst = NULL;
3275 } else if (dst == NULL) {
3276 dst = ptr;
3277 }
3278 ptr++;
3279 ch = *ptr;
3280 }
3281 if (dst != NULL) {
3282 *dst = '\0';
3283 }
3284 }
3285
3286 return str;
3287 }
3288
StripAllSpaces(CharPtr str)3289 NLM_EXTERN CharPtr StripAllSpaces (
3290 CharPtr str
3291 )
3292
3293 {
3294 Char ch;
3295 CharPtr dst;
3296 CharPtr ptr;
3297
3298 if (str == NULL || str [0] == '\0') return NULL;
3299
3300 dst = str;
3301 ptr = str;
3302 ch = *ptr;
3303 while (ch != '\0') {
3304 if (ch == ' ' || ch == '\t') {
3305 } else {
3306 *dst = ch;
3307 dst++;
3308 }
3309 ptr++;
3310 ch = *ptr;
3311 }
3312 *dst = '\0';
3313
3314 return str;
3315 }
3316
3317 static CharPtr remarksText [] = {
3318 "full automatic", "full staff_review", "full staff_entry",
3319 "simple staff_review", "simple staff_entry", "simple automatic",
3320 "unannotated automatic", "unannotated staff_review", "unannotated staff_entry",
3321 NULL
3322 };
3323
AddReferenceToGbseq(GBSeqPtr gbseq,GBReferencePtr gbref,CharPtr str,RefBlockPtr rbp,BioseqPtr bsp)3324 static void AddReferenceToGbseq (
3325 GBSeqPtr gbseq,
3326 GBReferencePtr gbref,
3327 CharPtr str,
3328 RefBlockPtr rbp,
3329 BioseqPtr bsp
3330 )
3331
3332 {
3333 Char buf [32];
3334 CharPtr copy;
3335 ValNodePtr head = NULL;
3336 IntRefBlockPtr irp;
3337 SeqLocPtr loc;
3338 CharPtr ptr;
3339 CharPtr ref;
3340 SeqLocPtr slp;
3341 Int4 start;
3342 Int4 stop;
3343 CharPtr tmp;
3344
3345 if (gbseq == NULL || gbref == NULL || StringHasNoText (str) || rbp == NULL || bsp == NULL) return;
3346
3347 copy = StringSave (str);
3348
3349 /* link in reverse order, to be reversed in slash block */
3350
3351 gbref->next = gbseq->references;
3352 gbseq->references = gbref;
3353
3354 /* now parse or make ASN required default values for remaining fields */
3355
3356 if (StringNCmp (copy, "REFERENCE ", 12) == 0) {
3357 ref = copy + 12;
3358 ptr = StringStr (ref, "\n AUTHORS");
3359 if (ptr == NULL) {
3360 ptr = StringStr (ref, "\n CONSRTM");
3361 }
3362 if (ptr == NULL) {
3363 ptr = StringStr (ref, ")\n");
3364 if (ptr != NULL) {
3365 ptr++;
3366 }
3367 }
3368 if (ptr != NULL) {
3369 *ptr = '\0';
3370 /* gbref->reference = StringSave (ref); */
3371 sprintf (buf, "%d", (int) rbp->serial);
3372 gbref->reference = StringSave (buf);
3373 }
3374 }
3375
3376 if (gbref->reference == NULL) {
3377 gbref->reference = StringSave ("?");
3378 }
3379
3380 CleanQualValue (gbref->reference);
3381 Asn2gnbkCompressSpaces (gbref->reference);
3382
3383 if (gbref->journal == NULL) {
3384 gbref->journal = StringSave ("?");
3385 }
3386
3387 CleanQualValue (gbref->journal);
3388 Asn2gnbkCompressSpaces (gbref->journal);
3389
3390 MemFree (copy);
3391
3392 if (rbp->sites == 1 || rbp->sites == 2) {
3393 gbref->position = StringSave ("sites");
3394 } else if (rbp->sites == 3) {
3395 } else {
3396 irp = (IntRefBlockPtr) rbp;
3397 loc = irp->loc;
3398 if (loc != NULL) {
3399 slp = SeqLocFindNext (loc, NULL);
3400 while (slp != NULL) {
3401 start = SeqLocStart (slp) + 1;
3402 stop = SeqLocStop (slp) + 1;
3403 if (head == NULL) {
3404 sprintf (buf, "%ld..%ld", (long) start, (long) stop);
3405 } else {
3406 sprintf (buf, "; %ld..%ld", (long) start, (long) stop);
3407 }
3408 ValNodeCopyStr (&head, 0, buf);
3409 slp = SeqLocFindNext (loc, slp);
3410 }
3411 tmp = MergeFFValNodeStrs (head);
3412 ValNodeFreeData (head);
3413 gbref->position = tmp;
3414 } else {
3415 start = 1;
3416 stop = bsp->length;
3417 sprintf (buf, "%ld..%ld", (long) start, (long) stop);
3418 gbref->position = StringSave (buf);
3419 }
3420 }
3421 }
3422
IsCitSub(PubdescPtr pdp,CitSubPtr csp)3423 static Boolean IsCitSub (
3424 PubdescPtr pdp,
3425 CitSubPtr csp
3426 )
3427
3428 {
3429 ValNodePtr vnp;
3430
3431 if (csp != NULL) return TRUE;
3432 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3433 if (vnp->choice == PUB_Sub) return TRUE;
3434 }
3435 return FALSE;
3436 }
3437
FF_www_muid(IntAsn2gbJobPtr ajp,StringItemPtr ffstring,Int4 muid)3438 static void FF_www_muid(
3439 IntAsn2gbJobPtr ajp,
3440 StringItemPtr ffstring,
3441 Int4 muid
3442 )
3443 {
3444 Char numbuf[40];
3445
3446 if ( GetWWW(ajp) ) {
3447 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3448 FF_Add_NCBI_Base_URL (ffstring, link_muid);
3449 sprintf (numbuf, "%ld", (long)muid);
3450 FFAddTextToString (ffstring, NULL, numbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
3451 FFAddOneString (ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
3452 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3453 } else {
3454 sprintf(numbuf, "%ld", (long)muid);
3455 FFAddOneString (ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
3456 }
3457 }
3458
GetJournalPubStatus(PubdescPtr pdp)3459 static Uint1 GetJournalPubStatus (PubdescPtr pdp)
3460
3461 {
3462 CitArtPtr cap;
3463 CitJourPtr cjp;
3464 ImprintPtr imp;
3465 ValNodePtr vnp;
3466
3467 if (pdp == NULL) return 0;
3468
3469 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3470 if (vnp->choice != PUB_Article) continue;
3471 cap = (CitArtPtr) vnp->data.ptrvalue;
3472 if (cap == NULL) continue;
3473 if (cap->from != 1) continue;
3474 cjp = (CitJourPtr) cap->fromptr;
3475 if (cjp == NULL) continue;
3476 imp = cjp->imp;
3477 if (imp == NULL) continue;
3478 return imp->pubstatus;
3479 }
3480
3481 return 0;
3482 }
3483
FormatReferenceBlock(Asn2gbFormatPtr afp,BaseBlockPtr bbp)3484 NLM_EXTERN CharPtr FormatReferenceBlock (
3485 Asn2gbFormatPtr afp,
3486 BaseBlockPtr bbp
3487 )
3488
3489 {
3490 SeqMgrAndContext acontext;
3491 AnnotDescPtr adp;
3492 IntAsn2gbJobPtr ajp;
3493 AuthListPtr alp;
3494 Asn2gbSectPtr asp;
3495 BioseqPtr bsp;
3496 Char buf [150];
3497 CitArtPtr cap;
3498 Char ch;
3499 CitJourPtr cjp;
3500 Boolean citArtIsoJta;
3501 CharPtr consortium;
3502 CitPatPtr cpp;
3503 CitRetractPtr crp;
3504 CitSubPtr csp = NULL;
3505 SeqMgrDescContext dcontext;
3506 CharPtr doi = NULL;
3507 SeqMgrFeatContext fcontext;
3508 Int4 gibbsq;
3509 GBReferencePtr gbref = NULL;
3510 GBSeqPtr gbseq;
3511 GBXrefPtr gxp;
3512 ValNodePtr head;
3513 Int2 i;
3514 ArticleIdPtr ids;
3515 ImprintPtr imp;
3516 IndxPtr index;
3517 IntRefBlockPtr irp;
3518 Boolean is_ed = FALSE;
3519 size_t len;
3520 SeqLocPtr loc = NULL;
3521 MedlineEntryPtr mep;
3522 Int4 muid = 0;
3523 Boolean needsPeriod = FALSE;
3524 SeqLocPtr nextslp;
3525 Boolean notFound;
3526 ObjMgrDataPtr omdp;
3527 PubdescPtr pdp = NULL;
3528 PubdescPtr pdpcopy = NULL;
3529 PubmedEntryPtr pep = NULL;
3530 Int4 pmid = 0;
3531 CharPtr prefix = NULL;
3532 Uint1 pubstatus;
3533 CharPtr pubstatnote;
3534 RefBlockPtr rbp;
3535 ValNodePtr remarks = NULL;
3536 CharPtr remprefix = NULL;
3537 SubmitBlockPtr sbp;
3538 SeqDescrPtr sdp;
3539 ErrSev sev;
3540 SeqFeatPtr sfp = NULL;
3541 SeqIdPtr sip;
3542 SeqLocPtr slp;
3543 SeqSubmitPtr ssp;
3544 Int4 start;
3545 Int4 stop;
3546 CharPtr str = NULL;
3547 Boolean strict_isojta;
3548 CharPtr suffix = NULL;
3549 BioseqPtr target;
3550 CharPtr tmp;
3551 Boolean trailingPeriod = TRUE;
3552 ValNodePtr vnp;
3553 StringItemPtr ffstring, temp;
3554
3555 if (afp == NULL || bbp == NULL) return NULL;
3556 rbp = (RefBlockPtr) bbp;
3557 ajp = afp->ajp;
3558 if (ajp == NULL) return NULL;
3559 asp = afp->asp;
3560 if (asp == NULL) return NULL;
3561 target = asp->target;
3562 bsp = asp->bsp;
3563 if (target == NULL || bsp == NULL) return NULL;
3564
3565 /* five-column feature table uses special code for formatting */
3566
3567 if (ajp->format == FTABLE_FMT) {
3568 irp = (IntRefBlockPtr) bbp;
3569 if (irp->loc != NULL) {
3570 if (irp->rb.pmid != 0 || irp->rb.muid != 0) {
3571 head = NULL;
3572 PrintFtableIntervals (&head, target, irp->loc, "REFERENCE", FALSE);
3573 if (irp->rb.pmid != 0) {
3574 sprintf (buf, "\t\t\tpmid\t%ld\n", (long) irp->rb.pmid);
3575 ValNodeCopyStr (&head, 0, buf);
3576 } else if (irp->rb.muid != 0) {
3577 sprintf (buf, "\t\t\tmuid\t%ld\n", (long) irp->rb.muid);
3578 ValNodeCopyStr (&head, 0, buf);
3579 }
3580 str = MergeFFValNodeStrs (head);
3581 ValNodeFreeData (head);
3582 }
3583 }
3584 return str;
3585 }
3586
3587 /* otherwise do regular flatfile formatting */
3588
3589 ffstring = FFGetString(ajp);
3590 if ( ffstring == NULL ) return NULL;
3591
3592 if (ajp->index) {
3593 index = &asp->index;
3594 } else {
3595 index = NULL;
3596 }
3597
3598 if (ajp->gbseq) {
3599 gbseq = &asp->gbseq;
3600 } else {
3601 gbseq = NULL;
3602 }
3603
3604 if (! StringHasNoText (rbp->string)) return StringSave (rbp->string);
3605
3606 /* could be descriptor, feature, annotdesc, or submit block citation */
3607
3608 if (rbp->itemtype == OBJ_SEQDESC) {
3609
3610 sdp = SeqMgrGetDesiredDescriptor (rbp->entityID, NULL, rbp->itemID, 0, NULL, &dcontext);
3611 if (sdp != NULL && dcontext.seqdesctype == Seq_descr_pub) {
3612 pdp = (PubdescPtr) sdp->data.ptrvalue;
3613 }
3614
3615 } else if (rbp->itemtype == OBJ_SEQFEAT) {
3616
3617 sfp = SeqMgrGetDesiredFeature (rbp->entityID, NULL, rbp->itemID, 0, NULL, &fcontext);
3618 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_PUB) {
3619 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
3620 }
3621
3622 } else if (rbp->itemtype == OBJ_ANNOTDESC) {
3623
3624 adp = SeqMgrGetDesiredAnnotDesc (rbp->entityID, NULL, rbp->itemID, &acontext);
3625 if (adp != NULL && acontext.annotdesctype == Annot_descr_pub) {
3626 pdp = (PubdescPtr) adp->data.ptrvalue;
3627 }
3628
3629 } else if (rbp->itemtype == OBJ_SEQSUB_CIT) {
3630
3631 omdp = ObjMgrGetData (rbp->entityID);
3632 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
3633 ssp = (SeqSubmitPtr) omdp->dataptr;
3634 if (ssp != NULL && ssp->datatype == 1) {
3635 sbp = ssp->sub;
3636 if (sbp != NULL) {
3637 csp = sbp->cit;
3638 }
3639 }
3640 }
3641 }
3642
3643 if (pdp == NULL && csp == NULL) return NULL;
3644
3645 temp = FFGetString(ajp);
3646 if ( temp == NULL ) {
3647 FFRecycleString(ajp, ffstring);
3648 return NULL;
3649 }
3650
3651 /* any justuids left at this point is RefSeq protein, and should be fetched */
3652
3653 irp = (IntRefBlockPtr) rbp;
3654 if (irp->justuids) {
3655 sev = ErrSetMessageLevel (SEV_MAX);
3656 if (rbp->pmid != 0) {
3657 pep = GetPubMedForUid (rbp->pmid);
3658 } else if (rbp->muid != 0) {
3659 pep = GetPubMedForUid (rbp->muid);
3660 }
3661 ErrSetMessageLevel (sev);
3662 if (pep != NULL) {
3663 mep = (MedlineEntryPtr) pep->medent;
3664 if (mep != NULL && mep->cit != NULL) {
3665 pdpcopy = AsnIoMemCopy ((Pointer) pdp,
3666 (AsnReadFunc) PubdescAsnRead,
3667 (AsnWriteFunc) PubdescAsnWrite);
3668 cap = AsnIoMemCopy ((Pointer) mep->cit,
3669 (AsnReadFunc) CitArtAsnRead,
3670 (AsnWriteFunc) CitArtAsnWrite);
3671 vnp = ValNodeAddPointer (&(pdpcopy->pub), PUB_Article, (Pointer) cap);
3672 pdp = pdpcopy;
3673 }
3674 }
3675 }
3676
3677 /* print serial number */
3678 FFStartPrint(temp, afp->format, 0, 12, "REFERENCE", 12, 5, 5, "RN", TRUE);
3679
3680 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3681 if (rbp->serial > 99) {
3682 sprintf (buf, "%d ", (int) rbp->serial);
3683 } else {
3684 sprintf (buf, "%d", (int) rbp->serial);
3685 }
3686 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3687 sprintf (buf, "[%d]", (int) rbp->serial);
3688 }
3689
3690 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
3691
3692 /* print base range */
3693
3694 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3695
3696 if (rbp->sites != 3) {
3697 FFAddNChar(temp, ' ', 15 - temp->pos, FALSE);
3698 }
3699 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3700
3701 if (rbp->sites == 0) {
3702 FFLineWrap(ajp, ffstring, temp, 0, 5, ASN2FF_EMBL_MAX, "RN");
3703 FFRecycleString(ajp, temp);
3704 temp = FFGetString(ajp);
3705 FFStartPrint(temp, afp->format, 0, 0, NULL, 0, 5, 5, "RP", FALSE);
3706 }
3707 }
3708
3709 if (rbp->sites == 1 || rbp->sites == 2) {
3710
3711 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3712 FFAddOneString (temp, "(sites)", FALSE, FALSE, TILDE_TO_SPACES);
3713 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3714 } else {
3715 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
3716 }
3717 } else if (rbp->sites == 3) {
3718 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3719 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3720 } else {
3721 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
3722 }
3723 } else {
3724 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3725 FFAddNChar(temp, ' ', 15 - temp->pos, FALSE);
3726 if (afp->format == GENBANK_FMT) {
3727 FFAddOneString (temp, "(bases ", FALSE, FALSE, TILDE_TO_SPACES);
3728 } else {
3729 FFAddOneString (temp, "(residues ", FALSE, FALSE, TILDE_TO_SPACES);
3730 }
3731 }
3732
3733 irp = (IntRefBlockPtr) rbp;
3734 loc = irp->loc;
3735
3736 if (loc != NULL) {
3737 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3738 suffix = "; ";
3739 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3740 suffix = ", ";
3741 }
3742
3743 slp = SeqLocFindNext (loc, NULL);
3744 while (slp != NULL) {
3745 nextslp = SeqLocFindNext (loc, slp);
3746 start = SeqLocStart (slp) + 1;
3747 stop = SeqLocStop (slp) + 1;
3748 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3749 sprintf (buf, "%ld to %ld", (long) start, (long) stop);
3750 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3751 sprintf (buf, "%ld-%ld", (long) start, (long) stop);
3752 }
3753 if (nextslp == NULL) {
3754 suffix = NULL;
3755 }
3756 FFAddTextToString (temp, NULL, buf, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3757 slp = nextslp;
3758 }
3759
3760 } else {
3761
3762 /* code still used for ssp->cit */
3763
3764 start = 1;
3765 stop = bsp->length;
3766 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3767 sprintf (buf, "%ld to %ld", (long) start, (long) stop);
3768 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3769 sprintf (buf, "%ld-%ld", (long) start, (long) stop);
3770 }
3771 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
3772 }
3773
3774 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3775 FFAddOneString (temp, ")", FALSE, FALSE, TILDE_TO_SPACES);
3776 }
3777 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3778 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3779 } else {
3780 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RP");
3781 }
3782 }
3783
3784 if (gbseq != NULL) {
3785 gbref = GBReferenceNew ();
3786 }
3787
3788 /* print author list */
3789
3790 str = NULL;
3791 consortium = NULL;
3792
3793 alp = GetAuthListPtr (pdp, csp);
3794 if (alp != NULL) {
3795 str = GetAuthorsString (afp->format, alp, &consortium, index, gbref);
3796 TrimSpacesAroundString (str);
3797 Asn2gnbkCompressSpaces (str);
3798 if (StringCmp (str, "?") == 0) {
3799 str = MemFree (str);
3800 }
3801 }
3802
3803 if (str != NULL || StringHasNoText (consortium)) {
3804 FFRecycleString(ajp, temp);
3805 temp = FFGetString(ajp);
3806 FFStartPrint(temp, afp->format, 2, 12, "AUTHORS", 12, 5, 5, "RA", FALSE);
3807
3808 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3809 suffix = NULL;
3810 trailingPeriod = TRUE;
3811 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3812 trailingPeriod = FALSE;
3813 len = StringLen (str);
3814 if (len > 0 && str != NULL && str [len - 1] != '.') {
3815 suffix = ".;";
3816 } else {
3817 suffix = ";";
3818 }
3819 }
3820
3821 /* if no authors were found, period will still be added by this call */
3822 if (str != NULL) {
3823 FFAddTextToString (temp, NULL, str, suffix, trailingPeriod, FALSE, TILDE_TO_SPACES);
3824 } else if (StringHasNoText (consortium)) {
3825 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3826 FFAddOneChar(temp, '.', FALSE);
3827 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3828 FFAddOneChar(temp, ';', FALSE);
3829 }
3830 }
3831
3832 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3833 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3834 } else {
3835 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RA");
3836 }
3837 }
3838 MemFree (str);
3839
3840 /* print consortium */
3841
3842 FFRecycleString(ajp, temp);
3843 temp = FFGetString(ajp);
3844 if (! StringHasNoText (consortium)) {
3845 FFStartPrint (temp, afp->format, 2, 12, "CONSRTM", 12, 5, 5, "RG", FALSE);
3846 FFAddTextToString (temp, NULL, consortium, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3847 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3848 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3849 } else {
3850 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RG");
3851 }
3852 }
3853 MemFree (consortium);
3854
3855 /* print title */
3856 FFRecycleString(ajp, temp);
3857 temp = FFGetString(ajp);
3858
3859 str = GetPubTitle (afp->format, pdp, csp);
3860 CleanPubTitle (str);
3861 StrStripSpaces (str);
3862
3863 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3864 prefix = NULL;
3865 suffix = NULL;
3866 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3867 if (str != NULL) {
3868 prefix = "\"";
3869 suffix = "\";";
3870 } else {
3871 prefix = NULL;
3872 suffix = ";";
3873 }
3874 }
3875
3876 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3877 if (! StringHasNoText (str)) {
3878 FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE);
3879
3880 FFAddTextToString (temp, prefix, str, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3881 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3882 }
3883 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3884 FFStartPrint (temp, afp->format, 2, 12, "TITLE", 12, 5, 5, "RT", FALSE);
3885 if (! StringHasNoText (str)) {
3886
3887 FFAddTextToString (temp, prefix, str, suffix, FALSE, FALSE, TILDE_TO_SPACES);
3888
3889 } else {
3890 FFAddOneChar (temp, ';', FALSE);
3891 }
3892 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RT");
3893 }
3894
3895 if (gbseq != NULL) {
3896 if (gbref != NULL) {
3897 gbref->title = StringSaveNoNull (str);
3898 }
3899 }
3900
3901 MemFree (str);
3902
3903 /* print journal */
3904 FFRecycleString(ajp, temp);
3905 temp = FFGetString(ajp);
3906
3907 FFStartPrint (temp, afp->format, 2, 12, "JOURNAL", 12, 5, 5, "RL", FALSE);
3908
3909 /* Only GenBank/EMBL/DDBJ require ISO JTA in ENTREZ/RELEASE modes (RefSeq should later) */
3910
3911 citArtIsoJta = ajp->flags.citArtIsoJta;
3912 strict_isojta = FALSE;
3913 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3914 if (sip->choice == SEQID_GENBANK ||
3915 sip->choice == SEQID_EMBL ||
3916 sip->choice == SEQID_DDBJ ||
3917 /* sip->choice == SEQID_OTHER || */
3918 sip->choice == SEQID_TPG ||
3919 sip->choice == SEQID_TPE ||
3920 sip->choice == SEQID_TPD) {
3921 strict_isojta = TRUE;
3922 }
3923 if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
3924 is_ed = TRUE;
3925 }
3926 }
3927 if (! strict_isojta) {
3928 citArtIsoJta = FALSE;
3929 }
3930
3931 str = GetPubJournal (afp->format, ajp->mode, ajp->flags.dropBadCitGens,
3932 is_ed, ajp->flags.noAffilOnUnpub, citArtIsoJta,
3933 pdp, csp, bsp->id, index, ajp);
3934 if (str == NULL) {
3935 str = StringSave ("Unpublished");
3936 }
3937 StrStripSpaces (str);
3938 TrimSpacesAroundString (str);
3939 Asn2gnbkCompressSpaces (str);
3940
3941 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3942 needsPeriod = FALSE;
3943 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
3944 if (! IsCitSub (pdp, csp)) {
3945 needsPeriod = TRUE;
3946 }
3947 }
3948
3949 FFAddOneString (temp, str, FALSE, FALSE, TILDE_IGNORE);
3950 if (needsPeriod) {
3951 FFAddOneChar(temp, '.', FALSE);
3952 }
3953
3954 if (gbseq != NULL) {
3955 if (gbref != NULL) {
3956 gbref->journal = StringSaveNoNull (str);
3957 tmp = gbref->journal;
3958 if (tmp != NULL) {
3959 ch = *tmp;
3960 while (ch != '\0') {
3961 if (ch == '\n' || ch == '\r' || ch == '\t') {
3962 *tmp = ' ';
3963 }
3964 tmp++;
3965 ch = *tmp;
3966 }
3967 TrimSpacesAroundString (gbref->journal);
3968 }
3969 }
3970 }
3971
3972 MemFree (str);
3973 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
3974 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
3975 } else {
3976 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RL");
3977 }
3978
3979 if (gbseq != NULL) {
3980 if (gbref != NULL) {
3981 if (pdp != NULL) {
3982 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
3983 if (vnp->choice == PUB_Article) {
3984 cap = (CitArtPtr) vnp->data.ptrvalue;
3985 if (cap != NULL) {
3986 for (ids = cap->ids; ids != NULL; ids = ids->next) {
3987 if (ids->choice == ARTICLEID_DOI) {
3988 tmp = (CharPtr) ids->data.ptrvalue;
3989 if (StringDoesHaveText (tmp) && StringNCmp (tmp, "10.", 3) == 0) {
3990 gxp = GBXrefNew ();
3991 if (gxp != NULL) {
3992 gxp->dbname = StringSave ("doi");
3993 gxp->id = StringSave (tmp);
3994 gxp->next = gbref->xref;
3995 gbref->xref = gxp;
3996 }
3997 }
3998 }
3999 }
4000 }
4001 }
4002 }
4003 }
4004 }
4005 }
4006
4007 /* print muid */
4008 FFRecycleString(ajp, temp);
4009 temp = FFGetString(ajp);
4010
4011 pmid = GetPmid (pdp);
4012 muid = GetMuid (pdp);
4013
4014 if (pmid == 0 && muid > 0) {
4015 FFStartPrint (temp, afp->format, 2, 12, "MEDLINE", 12, 5, 5, "RX", FALSE);
4016
4017 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4018 FF_www_muid (ajp, temp, muid);
4019 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4020 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4021 sprintf (buf, "MEDLINE; %ld.", (long) muid);
4022 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
4023 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
4024 }
4025 }
4026
4027 FFRecycleString(ajp, temp);
4028 temp = FFGetString(ajp);
4029
4030 if (pmid > 0) {
4031 FFStartPrint (temp, afp->format, 3, 12, "PUBMED", 12, 5, 5, "RX", FALSE);
4032 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4033 FF_www_muid (ajp, temp, pmid);
4034 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4035 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4036 sprintf (buf, "PUBMED; %ld.", (long) pmid);
4037 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_TO_SPACES);
4038 FFLineWrap(ajp, ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "RX");
4039 }
4040 }
4041 FFRecycleString(ajp, temp);
4042
4043 if (gbseq != NULL) {
4044 if (gbref != NULL) {
4045 gbref->pubmed = pmid;
4046 }
4047 }
4048
4049 if (pdp == NULL) {
4050
4051 if (csp != NULL) {
4052 if (! StringHasNoText (csp->descr)) {
4053 FFRecycleString(ajp, temp);
4054 temp = FFGetString(ajp);
4055
4056 ValNodeCopyStr (&remarks, 0, csp->descr);
4057 FFStartPrint (temp, afp->format, 2, 12, "REMARK", 12, 5, 5, NULL, FALSE);
4058 /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
4059 AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
4060 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4061 }
4062 }
4063
4064 str = FFToCharPtr(ffstring);
4065
4066 if (gbseq != NULL) {
4067 if (gbref != NULL) {
4068 AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp);
4069 }
4070 }
4071
4072 FFRecycleString(ajp, ffstring);
4073 FFRecycleString(ajp, temp);
4074 if (pep != NULL) {
4075 PubmedEntryFree (pep);
4076 }
4077 if (pdpcopy != NULL) {
4078 PubdescFree (pdpcopy);
4079 }
4080
4081 return str;
4082 }
4083
4084
4085 /* !!! remainder of fields are only for GenBank !!! */
4086
4087 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4088
4089 prefix = "REMARK";
4090
4091 cpp = NULL;
4092 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4093 if (vnp->choice == PUB_Patent) {
4094 cpp = (CitPatPtr) vnp->data.ptrvalue;
4095 }
4096 }
4097 if (cpp != NULL && ajp != NULL && ajp->mode == ENTREZ_MODE) {
4098 if (StringCmp (cpp->country, "US") == 0) {
4099 if (StringDoesHaveText (cpp->number)) {
4100 FFRecycleString(ajp, temp);
4101 temp = FFGetString(ajp);
4102
4103 sprintf (buf, "CAMBIA Patent Lens: %s %s", cpp->country, cpp->number);
4104 if (remprefix != NULL) {
4105 ValNodeCopyStr (&remarks, 0, remprefix);
4106 }
4107 ValNodeCopyStr (&remarks, 0, buf);
4108 remprefix = "; ";
4109 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4110 if (GetWWW (ajp)) {
4111 sprintf (buf, "CAMBIA Patent Lens: %s ", cpp->country);
4112 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
4113 FFAddOneString (temp, "<a href=\"", FALSE, FALSE, TILDE_EXPAND);
4114 FFAddOneString (temp, link_cambia, FALSE, FALSE, TILDE_EXPAND);
4115 FFAddOneString (temp, cpp->country, FALSE, FALSE, TILDE_EXPAND);
4116 FFAddOneString (temp, cpp->number, FALSE, FALSE, TILDE_EXPAND);
4117 FFAddOneString (temp, "#list\">", FALSE, FALSE, TILDE_EXPAND);
4118 FFAddOneString (temp, cpp->number, FALSE, FALSE, TILDE_EXPAND);
4119 FFAddOneString (temp, "</a>", FALSE, FALSE, TILDE_EXPAND);
4120 } else {
4121 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
4122 }
4123 FFLineWrap (ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4124 prefix = NULL;
4125 }
4126 }
4127 }
4128
4129 if (pdp->comment != NULL) {
4130 for (i = 0, notFound = TRUE; notFound && remarksText [i] != NULL; i++) {
4131 if (StringCmp (pdp->comment, remarksText [i]) == 0) {
4132 notFound = FALSE;
4133 }
4134 }
4135 if (notFound) {
4136 FFRecycleString(ajp, temp);
4137 temp = FFGetString(ajp);
4138
4139 if (remprefix != NULL) {
4140 ValNodeCopyStr (&remarks, 0, remprefix);
4141 }
4142 ValNodeCopyStr (&remarks, 0, pdp->comment);
4143 remprefix = "; ";
4144 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4145 FFAddOneString (temp, pdp->comment, FALSE, TRUE, TILDE_EXPAND);
4146 /* AddCommentWithURLlinks(ajp, temp, NULL, pdp->comment, NULL); */
4147 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4148 prefix = NULL;
4149
4150 if (gbseq != NULL) {
4151 if (gbref != NULL) {
4152 /*
4153 gbref->remark = StringSave (pdp->comment);
4154 */
4155 }
4156 }
4157
4158 }
4159 }
4160
4161 gibbsq = 0;
4162 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4163 if (sip->choice == SEQID_GIBBSQ) {
4164 gibbsq = sip->data.intvalue;
4165 }
4166 }
4167 csp = NULL;
4168 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4169 if (vnp->choice == PUB_Sub) {
4170 csp = (CitSubPtr) vnp->data.ptrvalue;
4171 }
4172 }
4173 if (gibbsq > 0 /* && csp == NULL */) {
4174 FFRecycleString(ajp, temp);
4175 temp = FFGetString(ajp);
4176
4177 sprintf (buf, "GenBank staff at the National Library of Medicine created this entry [NCBI gibbsq %ld] from the original journal article.", (long) gibbsq);
4178 if (remprefix != NULL) {
4179 ValNodeCopyStr (&remarks, 0, remprefix);
4180 }
4181 ValNodeCopyStr (&remarks, 0, buf);
4182 remprefix = "; ";
4183 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4184 FFAddOneString (temp, buf, FALSE, FALSE, TILDE_EXPAND);
4185 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4186 prefix = NULL;
4187
4188 /* gibbsq comment section (fields may be copied from degenerate pubdesc) */
4189
4190 str = pdp->fig;
4191 if (StringHasNoText (str)) {
4192 str = irp->fig;
4193 }
4194 if (! StringHasNoText (str)) {
4195 FFRecycleString(ajp, temp);
4196 temp = FFGetString(ajp);
4197
4198 sprintf (buf, "This sequence comes from %s", str);
4199 if (remprefix != NULL) {
4200 ValNodeCopyStr (&remarks, 0, remprefix);
4201 }
4202 ValNodeCopyStr (&remarks, 0, buf);
4203 remprefix = "; ";
4204 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4205 FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
4206 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4207 prefix = NULL;
4208 }
4209
4210 if (pdp->poly_a || irp->poly_a) {
4211 FFRecycleString(ajp, temp);
4212 temp = FFGetString(ajp);
4213
4214 if (remprefix != NULL) {
4215 ValNodeCopyStr (&remarks, 0, remprefix);
4216 }
4217 ValNodeCopyStr (&remarks, 0, "Polyadenylate residues occurring in the figure were omitted from the sequence.");
4218 remprefix = "; ";
4219 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4220 FFAddOneString (temp, "Polyadenylate residues occurring in the figure were omitted from the sequence.", TRUE, TRUE, TILDE_EXPAND);
4221 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4222 prefix = NULL;
4223 }
4224
4225 str = pdp->maploc;
4226 if (StringHasNoText (str)) {
4227 str = irp->maploc;
4228 }
4229 if (! StringHasNoText (str)) {
4230 FFRecycleString(ajp, temp);
4231 temp = FFGetString(ajp);
4232
4233 sprintf (buf, "Map location: %s", str);
4234 if (remprefix != NULL) {
4235 ValNodeCopyStr (&remarks, 0, remprefix);
4236 }
4237 ValNodeCopyStr (&remarks, 0, buf);
4238 remprefix = "; ";
4239 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4240 FFAddOneString (temp, buf, TRUE, TRUE, TILDE_EXPAND);
4241 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4242 prefix = NULL;
4243 }
4244
4245 }
4246
4247 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4248 if (vnp->choice == PUB_Article) {
4249 cap = (CitArtPtr) vnp->data.ptrvalue;
4250 if (cap != NULL && cap->from == 1) {
4251 cjp = (CitJourPtr) cap->fromptr;
4252 if (cjp != NULL) {
4253 imp = cjp->imp;
4254 if (imp != NULL) {
4255 crp = imp->retract;
4256 if (crp != NULL) {
4257 if (crp->type == 1) {
4258 FFRecycleString(ajp, temp);
4259 temp = FFGetString(ajp);
4260
4261 len = StringLen (crp->exp) + 30;
4262 str = MemNew (sizeof (Char) * len);
4263 if (str != NULL) {
4264 StringCpy (str, "Retracted");
4265 if (StringDoesHaveText (crp->exp)) {
4266 StringCat (str, ":[");
4267 StringCat (str, crp->exp);
4268 StringCat (str, "]");
4269 }
4270 if (remprefix != NULL) {
4271 ValNodeCopyStr (&remarks, 0, remprefix);
4272 }
4273 ValNodeCopyStr (&remarks, 0, str);
4274 remprefix = "; ";
4275 str = MemFree (str);
4276 }
4277 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4278 FFAddOneString (temp, "Retracted", FALSE, FALSE, TILDE_TO_SPACES);
4279 if (StringDoesHaveText (crp->exp)) {
4280 FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
4281 }
4282 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4283 prefix = NULL;
4284 } else if (crp->type == 3) {
4285 FFRecycleString(ajp, temp);
4286 temp = FFGetString(ajp);
4287
4288 len = StringLen (crp->exp) + 30;
4289 str = MemNew (sizeof (Char) * len);
4290 if (str != NULL) {
4291 StringCpy (str, "Erratum");
4292 if (StringDoesHaveText (crp->exp)) {
4293 StringCat (str, ":[");
4294 StringCat (str, crp->exp);
4295 StringCat (str, "]");
4296 }
4297 if (remprefix != NULL) {
4298 ValNodeCopyStr (&remarks, 0, remprefix);
4299 }
4300 ValNodeCopyStr (&remarks, 0, str);
4301 remprefix = "; ";
4302 str = MemFree (str);
4303 }
4304 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4305 FFAddOneString (temp, "Erratum", FALSE, FALSE, TILDE_TO_SPACES);
4306 if (StringDoesHaveText (crp->exp)) {
4307 FFAddTextToString (temp, ":[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
4308 }
4309 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4310 prefix = NULL;
4311 } else if (crp->type == 4) {
4312 FFRecycleString(ajp, temp);
4313 temp = FFGetString(ajp);
4314
4315 len = StringLen (crp->exp) + 30;
4316 str = MemNew (sizeof (Char) * len);
4317 if (str != NULL) {
4318 StringCpy (str, "Correction");
4319 if (StringDoesHaveText (crp->exp)) {
4320 StringCat (str, " to:[");
4321 StringCat (str, crp->exp);
4322 StringCat (str, "]");
4323 }
4324 if (remprefix != NULL) {
4325 ValNodeCopyStr (&remarks, 0, remprefix);
4326 }
4327 ValNodeCopyStr (&remarks, 0, str);
4328 remprefix = "; ";
4329 str = MemFree (str);
4330 }
4331 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4332 FFAddOneString (temp, "Correction", FALSE, FALSE, TILDE_TO_SPACES);
4333 if (StringDoesHaveText (crp->exp)) {
4334 FFAddTextToString (temp, " to:[", crp->exp, "]", FALSE, TRUE, TILDE_EXPAND);
4335 }
4336 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4337 prefix = NULL;
4338 }
4339 }
4340 }
4341 }
4342 }
4343 } else if (vnp->choice == PUB_Sub) {
4344 csp = (CitSubPtr) vnp->data.ptrvalue;
4345 if (csp != NULL) {
4346 if (! StringHasNoText (csp->descr)) {
4347 FFRecycleString(ajp, temp);
4348 temp = FFGetString(ajp);
4349
4350 if (remprefix != NULL) {
4351 ValNodeCopyStr (&remarks, 0, remprefix);
4352 }
4353 ValNodeCopyStr (&remarks, 0, csp->descr);
4354 remprefix = "; ";
4355 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4356 /* FFAddOneString (temp, csp->descr, FALSE, TRUE, TILDE_EXPAND); */
4357 AddCommentWithURLlinks(ajp, temp, NULL, csp->descr, NULL);
4358 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4359 prefix = NULL;
4360 }
4361 }
4362 }
4363 }
4364
4365 pubstatnote = NULL;
4366 pubstatus = GetJournalPubStatus (pdp);
4367 if (pubstatus == 3) {
4368 pubstatnote = "Publication Status: Online-Only";
4369 } else if (pubstatus == 10) {
4370 pubstatnote = "Publication Status: Available-Online prior to print";
4371 }
4372 if (StringDoesHaveText (pubstatnote)) {
4373 FFRecycleString(ajp, temp);
4374 temp = FFGetString(ajp);
4375
4376 if (remprefix != NULL) {
4377 ValNodeCopyStr (&remarks, 0, remprefix);
4378 }
4379 ValNodeCopyStr (&remarks, 0, pubstatnote);
4380 remprefix = "; ";
4381 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4382 FFAddOneString (temp, pubstatnote, FALSE, FALSE, TILDE_EXPAND);
4383 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4384 prefix = NULL;
4385 }
4386
4387 }
4388
4389 if (pmid == 0 && muid == 0) {
4390 doi = GetDOI (pdp);
4391 if (StringDoesHaveText (doi) && StringNCmp (doi, "10.", 3) == 0) {
4392 FFRecycleString(ajp, temp);
4393 temp = FFGetString(ajp);
4394
4395 if (remprefix != NULL) {
4396 ValNodeCopyStr (&remarks, 0, remprefix);
4397 }
4398 remprefix = "; ";
4399 FFStartPrint (temp, afp->format, 2, 12, prefix, 12, 5, 5, NULL, FALSE);
4400 if (GetWWW (ajp) && (! CommentHasSuspiciousHtml (ajp, doi))) {
4401 FFAddOneString (temp, "DOI: ", FALSE, FALSE, TILDE_EXPAND);
4402 FFAddOneString (temp, "<a href=\"", FALSE, FALSE, TILDE_EXPAND);
4403 FFAddOneString (temp, link_doi, FALSE, FALSE, TILDE_EXPAND);
4404 FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
4405 FFAddOneString (temp, "\">", FALSE, FALSE, TILDE_EXPAND);
4406 FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
4407 FFAddOneString (temp, "</a>", FALSE, FALSE, TILDE_EXPAND);
4408 } else {
4409 FFAddOneString (temp, "DOI: ", FALSE, FALSE, TILDE_EXPAND);
4410 FFAddOneString (temp, doi, FALSE, FALSE, TILDE_EXPAND);
4411 }
4412 FFLineWrap(ajp, ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
4413 prefix = NULL;
4414 }
4415 }
4416
4417 str = FFToCharPtr(ffstring);
4418
4419 if (gbseq != NULL) {
4420 if (gbref != NULL) {
4421 if (remarks != NULL) {
4422 gbref->remark = MergeFFValNodeStrs (remarks);
4423 }
4424
4425 AddReferenceToGbseq (gbseq, gbref, str, rbp, bsp);
4426 }
4427 }
4428 ValNodeFreeData (remarks);
4429
4430 FFRecycleString(ajp, ffstring);
4431 FFRecycleString(ajp, temp);
4432 if (pep != NULL) {
4433 PubmedEntryFree (pep);
4434 }
4435 if (pdpcopy != NULL) {
4436 PubdescFree (pdpcopy);
4437 }
4438
4439 return str;
4440 }
4441
4442
4443