1 /* asndhuff.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * RCS $Id: asndhuff.c,v 6.1 1998/06/12 19:27:02 kans Exp $
27 *
28 * Author: Greg Schuler
29 *
30 * Version Creation Date: 9/23/92
31 *
32 * File Description:
33 asndhuff -- decompresses a compressed ASN,1 (CASN) file.
34
35 A Simple program to demonstrate the functions in casn.c
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date Name Description of modification
40 * -------- ---------- -----------------------------------------------------
41 * 03-22-95 Schuler Added cases for all current doc types
42 * 03-22-95 Schuler Use MonitorPtr instead of Handles for Monitors
43 *
44 * ==========================================================================
45 */
46
47 #include <casn.h>
48
49 #define REVISION_STR "$Revision: 6.1 $"
50
51 void do_medline PROTO((CASN_Handle casnh, AsnIoPtr aout));
52 void do_sequence PROTO((CASN_Handle casnh, AsnIoPtr aout));
53 void stream_medline PROTO((AsnIoPtr ain));
54 void stream_sequence PROTO((AsnIoPtr ain));
55
56 #define NUMARGS 2
57 Args myargs[NUMARGS] = {
58 { "Input file", NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0,0,NULL},
59 { "Output file", NULL, NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0,0,NULL}};
60
61
Main(void)62 Int2 Main (void)
63 {
64 CASN_Handle casnh;
65 AsnIoPtr aout, ain;
66
67 if (! GetArgs("AsnDHuff 1.0", NUMARGS, myargs))
68 return 1;
69
70 if (! SeqEntryLoad())
71 {
72 ErrShow();
73 return 1;
74 }
75
76 if ((casnh = CASN_Open(myargs[0].strvalue)) == NULL)
77 {
78 ErrShow();
79 return 1;
80 }
81
82 /*****************************************************************
83 *
84 * this part reads complete entries from compressed file and writes
85 * them to an uncompressed file
86 * if output file not given, it skips this step
87 ******************************************************************/
88
89 if (myargs[1].strvalue != NULL)
90 {
91 if ((aout = AsnIoOpen(myargs[1].strvalue,"wb")) ==NULL)
92 {
93 Message(MSG_FATAL, "Can't create %s\n", myargs[1].strvalue);
94 return 1;
95 }
96
97 switch (CASN_DocType(casnh))
98 {
99 case CASN_TypeMed : /* obsolete */
100 case CASN_Type_ml :
101 do_medline(casnh,aout);
102 break;
103
104 case CASN_TypeSeq : /* obsolete */
105 case CASN_Type_aa :
106 case CASN_Type_nt :
107 do_sequence(casnh,aout);
108 break;
109
110 default :
111 Message(MSG_ERROR,"Unknown document type");
112 break;
113 }
114
115 AsnIoClose(aout);
116
117 CASN_Close(casnh); /* close and reopen file */
118 if ((casnh = CASN_Open(myargs[0].strvalue)) == NULL)
119 {
120 ErrShow();
121 return 1;
122 }
123 }
124
125
126 /*************************************************************************
127 *
128 * This part processes the compressed file as a regular asn stream
129 * displaying title lines for sequence or medline records.
130 * if output file IS given, it skips this step
131 *
132 *************************************************************************/
133
134 if (myargs[1].strvalue == NULL)
135 {
136 ain = CASN_GetAsnIoPtr(casnh); /* get the stream pointer */
137
138 switch (CASN_DocType(casnh))
139 {
140 case CASN_TypeMed : /* obsolete */
141 case CASN_Type_ml :
142 stream_medline(ain);
143 break;
144
145 case CASN_TypeSeq : /* obsolete */
146 case CASN_Type_aa :
147 case CASN_Type_nt :
148 stream_sequence(ain);
149 break;
150
151 default :
152 Message(MSG_ERROR,"Unknown document type");
153 break;
154 }
155 }
156
157
158 CASN_Close(casnh);
159
160 return 0;
161 }
162
163
do_medline(CASN_Handle casnh,AsnIoPtr aout)164 void do_medline (CASN_Handle casnh, AsnIoPtr aout)
165 {
166 MedlineEntryPtr entry;
167 DataVal val;
168 Int4 total, count;
169 AsnModulePtr amp = AsnAllModPtr();
170 AsnTypePtr typePubSet = AsnTypeFind(amp,"Pub-set");
171 AsnTypePtr typeMedline = AsnTypeFind(amp,"Pub-set.medline");
172 AsnTypePtr typeMedlineE = AsnTypeFind(amp,"Pub-set.medline.E");
173 Monitor *pmonitor;
174
175 total = CASN_DocCount(casnh);
176
177 pmonitor = MonitorIntNew("MEDLINE Entries Decompressed", 0, total);
178
179 AsnWrite(aout,typePubSet,&val);
180 AsnStartStruct(aout,typeMedline);
181
182 for (count=0; entry=CASN_NextMedlineEntry(casnh); ++count)
183 {
184 MedlineEntryAsnWrite(entry,aout,typeMedlineE);
185 MedlineEntryFree(entry);
186 MonitorIntValue(pmonitor, count+1);
187 }
188
189 AsnEndStruct(aout,typeMedline);
190 MonitorFree(pmonitor);
191 if (count != total)
192 ErrPost(1,1, "Only %ld of %ld records read", count, total);
193 return;
194 }
195
196
do_sequence(CASN_Handle casnh,AsnIoPtr aout)197 void do_sequence (CASN_Handle casnh, AsnIoPtr aout)
198 {
199 SeqEntryPtr entry;
200 DataVal val;
201 Int4 total, count;
202 AsnModulePtr amp = AsnAllModPtr();
203 AsnTypePtr typeSet = AsnTypeFind(amp,"Bioseq-set");
204 AsnTypePtr typeSetSet = AsnTypeFind(amp,"Bioseq-set.seq-set");
205 AsnTypePtr typeSetSetE = AsnTypeFind(amp,"Bioseq-set.seq-set.E");
206 Monitor *pmonitor;
207
208 if (typeSet==NULL || typeSetSet==NULL || typeSetSetE==NULL)
209 {
210 Message(MSG_ERROR, "one or more atp's is NULL\n");
211 return;
212 }
213
214 total = CASN_DocCount(casnh);
215
216 pmonitor = MonitorIntNew("SEQUENCE Entries Decompressed", 0, total);
217
218 AsnStartStruct(aout,typeSet);
219 AsnStartStruct(aout,typeSetSet);
220
221 for (count=0; entry=CASN_NextSeqEntry(casnh); ++count)
222 {
223 SeqEntryAsnWrite(entry,aout,typeSetSetE);
224 SeqEntryFree(entry);
225 MonitorIntValue(pmonitor, count+1);
226 }
227
228 AsnEndStruct(aout,typeSetSet);
229 AsnEndStruct(aout,typeSet);
230
231 MonitorFree(pmonitor);
232
233 if (count != total)
234 ErrPost(1,1, "Only %ld of %ld records read", count, total);
235 return;
236 }
237
stream_medline(AsnIoPtr ain)238 void stream_medline (AsnIoPtr ain)
239 {
240 AsnTypePtr atp;
241 DataVal val;
242 AsnModulePtr amp = AsnAllModPtr();
243 AsnTypePtr typePubSet = AsnTypeFind(amp,"Medline-entry");
244 AsnTypePtr title = AsnTypeFind(amp,"Title.E.name");
245 Monitor *pmonitor;
246
247 pmonitor = MonitorStrNew("MEDLINE Titles Decompressed", 60);
248
249 atp = typePubSet;
250 while ((atp = AsnReadId(ain, amp, atp)) != NULL)
251 {
252 if (atp == title)
253 {
254 AsnReadVal(ain, atp, &val);
255 MonitorStrValue(pmonitor, (CharPtr)val.ptrvalue);
256 MemFree(val.ptrvalue);
257 }
258 else
259 AsnReadVal(ain, atp, NULL);
260 }
261 MonitorFree(pmonitor);
262 return;
263 }
264
stream_sequence(AsnIoPtr ain)265 void stream_sequence (AsnIoPtr ain)
266 {
267 AsnTypePtr atp;
268 DataVal val;
269 AsnModulePtr amp = AsnAllModPtr();
270 AsnTypePtr typeSeqEntry = AsnTypeFind(amp,"Seq-entry");
271 AsnTypePtr title = AsnTypeFind(amp,"Seq-descr.E.title");
272 Monitor *pmonitor;
273
274 pmonitor = MonitorStrNew("Sequence Titles Decompressed", 60);
275
276 atp = typeSeqEntry;
277 while ((atp = AsnReadId(ain, amp, atp)) != NULL)
278 {
279 if (atp == title)
280 {
281 AsnReadVal(ain, atp, &val);
282 MonitorStrValue(pmonitor, (CharPtr)val.ptrvalue);
283 MemFree(val.ptrvalue);
284 }
285 else
286 AsnReadVal(ain, atp, NULL);
287 if (! AsnGetLevel(ain)) /* finished reading a Seq-entry */
288 atp = typeSeqEntry; /* reset to start next one */
289 }
290 MonitorFree(pmonitor);
291 return;
292 }
293
294
295
296