1 /* asndhuff.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * RCS $Id: asndhuff.c,v 6.1 1998/06/12 19:27:02 kans Exp $
27 *
28 * Author:  Greg Schuler
29 *
30 * Version Creation Date: 9/23/92
31 *
32 * File Description:
33 	asndhuff  --  decompresses a compressed ASN,1 (CASN) file.
34 
35 	A Simple program to demonstrate the functions in casn.c
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date      Name        Description of modification
40 * --------  ----------  -----------------------------------------------------
41 * 03-22-95  Schuler     Added cases for all current doc types
42 * 03-22-95  Schuler     Use MonitorPtr instead of Handles for Monitors
43 *
44 * ==========================================================================
45 */
46 
47 #include <casn.h>
48 
49 #define REVISION_STR "$Revision: 6.1 $"
50 
51 void do_medline PROTO((CASN_Handle casnh, AsnIoPtr aout));
52 void do_sequence PROTO((CASN_Handle casnh, AsnIoPtr aout));
53 void stream_medline PROTO((AsnIoPtr ain));
54 void stream_sequence PROTO((AsnIoPtr ain));
55 
56 #define NUMARGS 2
57 Args myargs[NUMARGS] = {
58 	{ "Input file", NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0,0,NULL},
59 	{ "Output file", NULL, NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0,0,NULL}};
60 
61 
Main(void)62 Int2 Main (void)
63 {
64 	CASN_Handle casnh;
65 	AsnIoPtr     aout, ain;
66 
67 	if (! GetArgs("AsnDHuff 1.0", NUMARGS, myargs))
68 		return 1;
69 
70 	if (! SeqEntryLoad())
71 	{
72 		ErrShow();
73 		return 1;
74 	}
75 
76 	if ((casnh = CASN_Open(myargs[0].strvalue)) == NULL)
77 	{
78 		ErrShow();
79 		return 1;
80 	}
81 
82 	/*****************************************************************
83 	*
84 	*   this part reads complete entries from compressed file and writes
85 	*   them to an uncompressed file
86 	*     if output file not given, it skips this step
87 	******************************************************************/
88 
89 	if (myargs[1].strvalue != NULL)
90 	{
91 		if ((aout = AsnIoOpen(myargs[1].strvalue,"wb")) ==NULL)
92 		{
93 			Message(MSG_FATAL, "Can't create %s\n", myargs[1].strvalue);
94 			return 1;
95 		}
96 
97 		switch (CASN_DocType(casnh))
98 		{
99 			case CASN_TypeMed :  /* obsolete */
100 			case CASN_Type_ml :
101 				do_medline(casnh,aout);
102 				break;
103 
104 			case CASN_TypeSeq :  /* obsolete */
105 			case CASN_Type_aa :
106 			case CASN_Type_nt :
107 				do_sequence(casnh,aout);
108 				break;
109 
110 			default :
111 				Message(MSG_ERROR,"Unknown document type");
112 				break;
113 		}
114 
115 		AsnIoClose(aout);
116 
117 		CASN_Close(casnh);     /* close and reopen file */
118 		if ((casnh = CASN_Open(myargs[0].strvalue)) == NULL)
119 		{
120 			ErrShow();
121 			return 1;
122 		}
123 	}
124 
125 
126 	/*************************************************************************
127 	*
128 	*   This part processes the compressed file as a regular asn stream
129 	*   displaying title lines for sequence or medline records.
130 	*     if output file IS given, it skips this step
131 	*
132 	*************************************************************************/
133 
134 	if (myargs[1].strvalue == NULL)
135 	{
136 		ain = CASN_GetAsnIoPtr(casnh);    /* get the stream pointer */
137 
138 		switch (CASN_DocType(casnh))
139 		{
140 			case CASN_TypeMed :  /* obsolete */
141 			case CASN_Type_ml :
142 				stream_medline(ain);
143 				break;
144 
145 			case CASN_TypeSeq :  /* obsolete */
146 			case CASN_Type_aa :
147 			case CASN_Type_nt :
148 				stream_sequence(ain);
149 				break;
150 
151 			default :
152 				Message(MSG_ERROR,"Unknown document type");
153 				break;
154 		}
155 	}
156 
157 
158 	CASN_Close(casnh);
159 
160 	return 0;
161 }
162 
163 
do_medline(CASN_Handle casnh,AsnIoPtr aout)164 void do_medline (CASN_Handle casnh, AsnIoPtr aout)
165 {
166 	MedlineEntryPtr entry;
167 	DataVal val;
168 	Int4 total, count;
169 	AsnModulePtr amp = AsnAllModPtr();
170 	AsnTypePtr typePubSet = AsnTypeFind(amp,"Pub-set");
171 	AsnTypePtr typeMedline = AsnTypeFind(amp,"Pub-set.medline");
172 	AsnTypePtr typeMedlineE = AsnTypeFind(amp,"Pub-set.medline.E");
173 	Monitor *pmonitor;
174 
175 	total = CASN_DocCount(casnh);
176 
177 	pmonitor = MonitorIntNew("MEDLINE Entries Decompressed", 0, total);
178 
179 	AsnWrite(aout,typePubSet,&val);
180 	AsnStartStruct(aout,typeMedline);
181 
182 	for (count=0; entry=CASN_NextMedlineEntry(casnh); ++count)
183 	{
184 		MedlineEntryAsnWrite(entry,aout,typeMedlineE);
185 		MedlineEntryFree(entry);
186 		MonitorIntValue(pmonitor, count+1);
187 	}
188 
189 	AsnEndStruct(aout,typeMedline);
190 	MonitorFree(pmonitor);
191 	if (count != total)
192 		ErrPost(1,1, "Only %ld of %ld records read", count, total);
193 	return;
194 }
195 
196 
do_sequence(CASN_Handle casnh,AsnIoPtr aout)197 void do_sequence (CASN_Handle casnh, AsnIoPtr aout)
198 {
199 	SeqEntryPtr entry;
200 	DataVal val;
201 	Int4 total, count;
202 	AsnModulePtr amp = AsnAllModPtr();
203 	AsnTypePtr typeSet = AsnTypeFind(amp,"Bioseq-set");
204 	AsnTypePtr typeSetSet = AsnTypeFind(amp,"Bioseq-set.seq-set");
205 	AsnTypePtr typeSetSetE = AsnTypeFind(amp,"Bioseq-set.seq-set.E");
206 	Monitor *pmonitor;
207 
208 	if (typeSet==NULL || typeSetSet==NULL || typeSetSetE==NULL)
209 	{
210 		Message(MSG_ERROR, "one or more atp's is NULL\n");
211 		return;
212 	}
213 
214 	total = CASN_DocCount(casnh);
215 
216 	pmonitor = MonitorIntNew("SEQUENCE Entries Decompressed", 0, total);
217 
218 	AsnStartStruct(aout,typeSet);
219 	AsnStartStruct(aout,typeSetSet);
220 
221 	for (count=0; entry=CASN_NextSeqEntry(casnh); ++count)
222 	{
223 		SeqEntryAsnWrite(entry,aout,typeSetSetE);
224 		SeqEntryFree(entry);
225 		MonitorIntValue(pmonitor, count+1);
226 	}
227 
228 	AsnEndStruct(aout,typeSetSet);
229 	AsnEndStruct(aout,typeSet);
230 
231 	MonitorFree(pmonitor);
232 
233 	if (count != total)
234 		ErrPost(1,1, "Only %ld of %ld records read", count, total);
235 	return;
236 }
237 
stream_medline(AsnIoPtr ain)238 void stream_medline (AsnIoPtr ain)
239 {
240 	AsnTypePtr atp;
241 	DataVal val;
242 	AsnModulePtr amp = AsnAllModPtr();
243 	AsnTypePtr typePubSet = AsnTypeFind(amp,"Medline-entry");
244 	AsnTypePtr title = AsnTypeFind(amp,"Title.E.name");
245 	Monitor *pmonitor;
246 
247 	pmonitor = MonitorStrNew("MEDLINE Titles Decompressed", 60);
248 
249 	atp = typePubSet;
250 	while ((atp = AsnReadId(ain, amp, atp)) != NULL)
251 	{
252 		if (atp == title)
253 		{
254 			AsnReadVal(ain, atp, &val);
255 			MonitorStrValue(pmonitor, (CharPtr)val.ptrvalue);
256 			MemFree(val.ptrvalue);
257 		}
258 		else
259 			AsnReadVal(ain, atp, NULL);
260 	}
261 	MonitorFree(pmonitor);
262 	return;
263 }
264 
stream_sequence(AsnIoPtr ain)265 void stream_sequence (AsnIoPtr ain)
266 {
267 	AsnTypePtr atp;
268 	DataVal val;
269 	AsnModulePtr amp = AsnAllModPtr();
270 	AsnTypePtr typeSeqEntry = AsnTypeFind(amp,"Seq-entry");
271 	AsnTypePtr title = AsnTypeFind(amp,"Seq-descr.E.title");
272 	Monitor *pmonitor;
273 
274 	pmonitor = MonitorStrNew("Sequence Titles Decompressed", 60);
275 
276 	atp = typeSeqEntry;
277 	while ((atp = AsnReadId(ain, amp, atp)) != NULL)
278 	{
279 		if (atp == title)
280 		{
281 			AsnReadVal(ain, atp, &val);
282 			MonitorStrValue(pmonitor, (CharPtr)val.ptrvalue);
283 			MemFree(val.ptrvalue);
284 		}
285 		else
286 			AsnReadVal(ain, atp, NULL);
287 		if (! AsnGetLevel(ain))       /* finished reading a Seq-entry */
288 			atp = typeSeqEntry;		  /* reset to start next one */
289 	}
290 	MonitorFree(pmonitor);
291 	return;
292 }
293 
294 
295 
296