1 /*****************************************************************************
2 *
3 * seqget.c
4 * entrez version
5 *
6 * "Fasta style" SeqIds include a string indicating the class of SeqId,
7 * vertical bar, then fields from the SeqId separated by vertical bar
8 * If an (OPTIONAL) field is missing, the vertical bar must still be
9 * there.
10 *
11 * local = lcl|integer or string
12 * gibbsq = bbs|integer
13 * gibbmt = bbm|integer
14 * giim = gim|integer
15 * genbank = gb|accession|locus
16 * embl = emb|accession|locus
17 * pir = pir|accession|name
18 * swissprot = sp|accession|name
19 * patent = pat|country|patent number (string)|seq number (integer)
20 * other = oth|accession|name|release
21 * general = gnl|database(string)|id (string or number)
22 * gi = gi|integer
23 * ddbj = dbj|accession|locus
24 * prf = prf|accession|name
25 * pdb = pdb|entry name (string)|chain id (char)
26 *
27 *****************************************************************************/
28 #include <accentr.h>
29 #include <sequtil.h>
30 #include <tofasta.h>
31 #include <asn2ff.h>
32
33 #define NUMARGS 5
34 Args myargs[NUMARGS] = {
35 {"Filename for output ","stdout", NULL,NULL,FALSE,'o',ARG_FILE_OUT, 0.0,0,NULL},
36 {"Output type: 1=text asn.1 2=binary asn.1 3=genbank 4=genpept 5=fasta",
37 "1", "1", "5", FALSE, 't', ARG_INT, 0.0, 0, NULL } ,
38 {"GI id for single Bioseq to dump" ,"0","1","99999999",TRUE,'g',ARG_INT,0.0,0,NULL},
39 {"Maximum complexity of SeqEntry: 0=as stored, 1=Bioseq, 2=Seq-set, 3=nuc-prot set " ,"0","0","3",TRUE,'c',ARG_INT,0.0,0,NULL},
40 {"Fasta style SeqId ENCLOSED IN QUOTES:\nlcl|int or str bbs|int bbm|int gb|acc|loc\nemb|acc|loc pir|acc|name sp|acc|name\npat|country|patent|seq gi|int\ndbj|acc|loc prf|acc|name pdb|entry|chain ",
41 NULL,NULL,NULL,TRUE,'s',ARG_STRING,0.0,0,NULL}};
42
Main(void)43 Int2 Main(void)
44 {
45 Int2 retcode, /* Default is nuc-prot */
46 outtype;
47
48 SeqIdPtr sip=NULL; /* Same as a ValNodePtr, generic data ptr implemented */
49 /* as a choice and a union. */
50
51 Int4 gi;
52 SeqEntryPtr sep;
53 AsnIoPtr asnout=NULL;
54 FILE * fp=NULL;
55 Boolean is_network;
56 Char tbuf[40];
57 CharPtr outmode;
58
59 /*
60 ** Get program arguments
61 */
62
63 if ( !GetArgs("SeqGet 1.0", NUMARGS, myargs) ) return 1;
64
65 /*
66 ** Set parameters from the command line
67 */
68
69 outtype = (Int2)myargs[1].intvalue;
70 gi = myargs[2].intvalue;
71 retcode = (Int2)myargs[3].intvalue;
72
73 if (myargs[4].strvalue != NULL)
74 {
75 if (gi)
76 {
77 ErrPostEx(SEV_FATAL, 1,0, "Use only one of -g or -s");
78 return 1;
79 }
80
81 sip = SeqIdParse((CharPtr)(myargs[4].strvalue));
82 if (sip == NULL)
83 {
84 ErrPostEx(SEV_FATAL, 1,0, "Can't parse [%s]",
85 (CharPtr)(myargs[4].strvalue));
86 return 1;
87 }
88 }
89 else if (! gi)
90 {
91 ErrPostEx(SEV_FATAL, 1,0, "Must supply one of -g or -s");
92 return 1;
93 }
94
95 /*
96 ** Initialize, open and otherwise prepare for CD-ROM or Network access.
97 */
98
99 if ( !EntrezInit("SeqGet", FALSE, &is_network) ) {
100 ErrPostEx(SEV_FATAL, 1,0, "Can't initialize Entrez");
101 return 1;
102 }
103
104 if (sip != NULL)
105 {
106 gi = EntrezFindSeqId(sip);
107 if (! gi)
108 {
109 EntrezFini();
110 SeqIdPrint(sip, tbuf, PRINTID_FASTA_SHORT);
111 ErrPostEx(SEV_FATAL, 1,0, "Couldn't find SeqId [%s]", tbuf);
112 return 1;
113 }
114 SeqIdFree(sip);
115 }
116
117 sep = EntrezSeqEntryGet(gi, retcode);
118 EntrezFini();
119
120 if (sep == NULL)
121 {
122 ErrPostEx(SEV_FATAL, 1,0,"Could not retrieve entry for GI %ld", (long)gi);
123 return 1;
124 }
125
126 outmode = "w";
127 switch (outtype)
128 {
129 case 2:
130 outmode = "wb";
131 case 1:
132 asnout = AsnIoOpen((CharPtr)myargs[0].strvalue, outmode);
133 if (asnout == NULL)
134 {
135 ErrPostEx(SEV_FATAL, 1,0, "Could not open [%s] for asn output",
136 myargs[0].strvalue);
137 return 1;
138 }
139 break;
140 case 3:
141 case 4:
142 case 5:
143 fp = FileOpen((CharPtr)myargs[0].strvalue, outmode);
144 if (fp == NULL)
145 {
146 ErrPostEx(SEV_FATAL, 1,0, "Could not open [%s] for asn output",
147 myargs[0].strvalue);
148 return 1;
149 }
150 break;
151 }
152
153 switch (outtype)
154 {
155 case 1:
156 case 2:
157 SeqEntryAsnWrite(sep, asnout, NULL);
158 break;
159 case 3:
160 SeqEntryToFlat(sep, fp, GENBANK_FMT, RELEASE_MODE);
161 break;
162 case 4:
163 SeqEntryToFlat(sep, fp, GENPEPT_FMT, RELEASE_MODE);
164 break;
165 case 5:
166 SeqEntryToFasta(sep, fp, TRUE); /* nuc acids */
167 SeqEntryToFasta(sep, fp, FALSE); /* proteins */
168 break;
169 }
170
171
172 AsnIoClose(asnout);
173 FileClose(fp);
174 SeqEntryFree(sep);
175
176 return 0;
177 }
178
179
180