1 /*****************************************************************************
2 *
3 *   seqget.c
4 *     entrez version
5 *
6 *   "Fasta style" SeqIds include a string indicating the class of SeqId,
7 *      vertical bar, then fields from the SeqId separated by vertical bar
8 *      If an (OPTIONAL) field is missing, the vertical bar must still be
9 *      there.
10 *
11 * local = lcl|integer or string
12 * gibbsq = bbs|integer
13 * gibbmt = bbm|integer
14 * giim = gim|integer
15 * genbank = gb|accession|locus
16 * embl = emb|accession|locus
17 * pir = pir|accession|name
18 * swissprot = sp|accession|name
19 * patent = pat|country|patent number (string)|seq number (integer)
20 * other = oth|accession|name|release
21 * general = gnl|database(string)|id (string or number)
22 * gi = gi|integer
23 * ddbj = dbj|accession|locus
24 * prf = prf|accession|name
25 * pdb = pdb|entry name (string)|chain id (char)
26 *
27 *****************************************************************************/
28 #include <accentr.h>
29 #include <sequtil.h>
30 #include <tofasta.h>
31 #include <asn2ff.h>
32 
33 #define NUMARGS 5
34 Args myargs[NUMARGS] = {
35 	{"Filename for output ","stdout", NULL,NULL,FALSE,'o',ARG_FILE_OUT, 0.0,0,NULL},
36 	{"Output type: 1=text asn.1 2=binary asn.1 3=genbank 4=genpept 5=fasta",
37 	  "1", "1", "5", FALSE, 't', ARG_INT, 0.0, 0, NULL } ,
38 	{"GI id for single Bioseq to dump" ,"0","1","99999999",TRUE,'g',ARG_INT,0.0,0,NULL},
39 	{"Maximum complexity of SeqEntry: 0=as stored, 1=Bioseq, 2=Seq-set, 3=nuc-prot set " ,"0","0","3",TRUE,'c',ARG_INT,0.0,0,NULL},
40  	{"Fasta style SeqId ENCLOSED IN QUOTES:\nlcl|int or str bbs|int bbm|int gb|acc|loc\nemb|acc|loc pir|acc|name sp|acc|name\npat|country|patent|seq gi|int\ndbj|acc|loc prf|acc|name pdb|entry|chain  ",
41 	NULL,NULL,NULL,TRUE,'s',ARG_STRING,0.0,0,NULL}};
42 
Main(void)43 Int2 Main(void)
44 {
45 	Int2         retcode,  /* Default is nuc-prot         */
46 		         outtype;
47 
48 	SeqIdPtr     sip=NULL;      /* Same as a ValNodePtr, generic data ptr implemented */
49 	                       /*   as a choice and a union. */
50 
51 	Int4         gi;
52 	SeqEntryPtr  sep;
53 	AsnIoPtr     asnout=NULL;
54 	FILE *       fp=NULL;
55 	Boolean      is_network;
56 	Char tbuf[40];
57 	CharPtr outmode;
58 
59 	/*
60 	** Get program arguments
61 	*/
62 
63 	if ( !GetArgs("SeqGet 1.0", NUMARGS, myargs) ) return 1;
64 
65 	/*
66 	** Set parameters from the command line
67 	*/
68 
69 	outtype = (Int2)myargs[1].intvalue;
70 	gi = myargs[2].intvalue;
71 	retcode = (Int2)myargs[3].intvalue;
72 
73 	if (myargs[4].strvalue != NULL)
74 	{
75 		if (gi)
76 		{
77 			ErrPostEx(SEV_FATAL, 1,0, "Use only one of -g or -s");
78 			return 1;
79 		}
80 
81 		sip = SeqIdParse((CharPtr)(myargs[4].strvalue));
82 		if (sip == NULL)
83 		{
84 			ErrPostEx(SEV_FATAL, 1,0, "Can't parse [%s]",
85 				(CharPtr)(myargs[4].strvalue));
86 			return 1;
87 		}
88 	}
89         else if (! gi)
90 	{
91 		ErrPostEx(SEV_FATAL, 1,0, "Must supply one of -g or -s");
92 		return 1;
93 	}
94 
95 	/*
96 	** Initialize, open and otherwise prepare for CD-ROM or Network access.
97 	*/
98 
99 	if ( !EntrezInit("SeqGet", FALSE, &is_network) ) {
100 		ErrPostEx(SEV_FATAL, 1,0, "Can't initialize Entrez");
101 		return 1;
102 	}
103 
104 	if (sip != NULL)
105 	{
106 		gi = EntrezFindSeqId(sip);
107 		if (! gi)
108 		{
109 			EntrezFini();
110 			SeqIdPrint(sip, tbuf, PRINTID_FASTA_SHORT);
111 			ErrPostEx(SEV_FATAL, 1,0, "Couldn't find SeqId [%s]", tbuf);
112 			return 1;
113 		}
114 		SeqIdFree(sip);
115 	}
116 
117 	sep = EntrezSeqEntryGet(gi, retcode);
118 	EntrezFini();
119 
120 	if (sep == NULL)
121 	{
122 		ErrPostEx(SEV_FATAL, 1,0,"Could not retrieve entry for GI %ld", (long)gi);
123 		return 1;
124 	}
125 
126 	outmode = "w";
127 	switch (outtype)
128 	{
129 		case 2:
130 			outmode = "wb";
131 		case 1:
132 			asnout = AsnIoOpen((CharPtr)myargs[0].strvalue, outmode);
133 			if (asnout == NULL)
134 			{
135 		         ErrPostEx(SEV_FATAL, 1,0, "Could not open [%s] for asn output",
136 		            myargs[0].strvalue);
137 		         return 1;
138 			}
139 			break;
140 		case 3:
141 		case 4:
142 		case 5:
143 			fp = FileOpen((CharPtr)myargs[0].strvalue, outmode);
144 			if (fp == NULL)
145 			{
146 		         ErrPostEx(SEV_FATAL, 1,0, "Could not open [%s] for asn output",
147 		            myargs[0].strvalue);
148 		         return 1;
149 			}
150 			break;
151 	}
152 
153 	switch (outtype)
154 	{
155 	    case 1:
156 		case 2:
157 			SeqEntryAsnWrite(sep, asnout, NULL);
158 			break;
159 		case 3:
160 			SeqEntryToFlat(sep, fp, GENBANK_FMT, RELEASE_MODE);
161 			break;
162 		case 4:
163 			SeqEntryToFlat(sep, fp, GENPEPT_FMT, RELEASE_MODE);
164 			break;
165 		case 5:
166 			SeqEntryToFasta(sep, fp, TRUE);  /* nuc acids */
167 			SeqEntryToFasta(sep, fp, FALSE); /* proteins */
168 			break;
169 	}
170 
171 
172 	AsnIoClose(asnout);
173 	FileClose(fp);
174 	SeqEntryFree(sep);
175 
176 	return 0;
177 }
178 
179 
180