1 /*****************************************************************************
2 *
3 *   testval.c
4 *   	check translations
5 *       check for stop codons
6 *       Check for and fix non 3.0 asn spec things
7 *
8 *****************************************************************************/
9 #include <objsub.h>
10 #include <valid.h>
11 
12 #define NUMARG 15
13 Args myargs[NUMARG] = {
14 	{"Filename for asn.1 input","stdin",NULL,NULL,TRUE,'i',ARG_FILE_IN,0.0,0,NULL},
15 	{"Input is a Seq-entry","F", NULL ,NULL ,TRUE,'e',ARG_BOOLEAN,0.0,0,NULL},
16 	{"Input is a Seq-submit","F", NULL ,NULL ,TRUE,'s',ARG_BOOLEAN,0.0,0,NULL},
17 	{"Input asnfile in binary mode","F",NULL,NULL,TRUE,'b',ARG_BOOLEAN,0.0,0,NULL},
18 	{"Filename for error messages","stderr",NULL,NULL,TRUE,'x',ARG_FILE_OUT,0.0,0,NULL} ,
19 	{"Filename for asn.1 output",NULL,NULL,NULL,TRUE,'o',ARG_FILE_OUT,0.0,0,NULL},
20 	{"Output asnfile in binary mode","F",NULL,NULL,TRUE,'t',ARG_BOOLEAN,0.0,0,NULL},
21 	{"Severity of error for count in return code","4","0","4",FALSE,'r',ARG_INT,0.0,0,NULL},
22 	{"Continue on ASN.1 error?","F",NULL,NULL,TRUE,'c',ARG_BOOLEAN,0.0,0,NULL},
23 	{"Patch invalid sequence residues?","F",NULL,NULL,TRUE,'p',ARG_BOOLEAN,0.0,0,NULL},
24 	{"Lowest severity of error to show","3","0","4",FALSE,'q',ARG_INT,0.0,0,NULL},
25 	{"ASN.1 spec level to filter","0","0","3",FALSE,'l',ARG_INT,0.0,0,NULL},
26 	{"Use SeqMgr indexing?","T",NULL,NULL,TRUE,'d',ARG_BOOLEAN,0.0,0,NULL},
27 	{"Validate alignments?","F",NULL,NULL,TRUE,'a',ARG_BOOLEAN,0.0,0,NULL},
28 	{"Require ISO-JTA?","F",NULL,NULL,TRUE,'j',ARG_BOOLEAN,0.0,0,NULL}};
29 
30 CharPtr AsnIoGets PROTO((AsnIoPtr aip));  /* from asnio.h */
31 void LIBCALLBACK error_ret PROTO((Int2 value, CharPtr msg));
32 
33 
Main(void)34 Int2 Main(void)
35 {
36 	AsnIoPtr aip, aipout=NULL;
37 	SeqEntryPtr sep;
38 	AsnTypePtr atp, atp2;
39 	AsnModulePtr amp;
40 	ValidStructPtr vsp;
41 	Int2 numerrors, found_one, fatal_error = 0, error_level, i, spec_version;
42 	CharPtr tmp;
43 	Boolean found;
44 	DataVal av;
45 
46 					/* check command line arguments */
47 	if ( ! GetArgs("testval",NUMARG, myargs))
48 		return 1;
49 
50 					/* load the sequence alphabets  */
51 					/* (and sequence parse trees)   */
52 	if (! SeqEntryLoad())
53 		ErrShow();
54 				/* Don't let the program die, no matter what */
55 	ErrSetFatalLevel(SEV_MAX);
56 	ErrSetMessageLevel(SEV_MAX);     /* don't report errors via Message()*/
57 	ErrSetOptFlags (EO_MSG_CODES);
58 	ErrSetLogfile(myargs[4].strvalue, 0);      /* report errors by printing to stderr */
59 
60 	ErrSetOpts (ERR_IGNORE, ERR_LOG_ON);
61 
62 
63 				    /* get pointer to all loaded ASN.1 modules */
64 	amp = AsnAllModPtr();
65 	if (amp == NULL)
66 		ErrShow();
67 
68 	if (myargs[2].intvalue)  /* Seq-submit */
69 	{
70 	    if (! SubmitAsnLoad())
71     	    ErrPostEx(SEV_FATAL, 1,0, "Unable to SeqSubmitLoad.");
72 
73 		atp = AsnFind("Seq-submit");    /* get the initial type pointers */
74 		if (atp == NULL)
75 			ErrShow();
76 		atp2 = AsnFind("Seq-submit.data.entrys.E");
77 		if (atp2 == NULL)
78 			ErrShow();
79 	}
80 	else if (! myargs[1].intvalue)   /* not Seq-entry */
81 	{
82 		atp = AsnFind("Bioseq-set");    /* get the initial type pointers */
83 		if (atp == NULL)
84 			ErrShow();
85 		atp2 = AsnFind("Bioseq-set.seq-set.E");
86 		if (atp2 == NULL)
87 			ErrShow();
88 	}
89 
90 	error_level = (Int2)myargs[7].intvalue;
91 	spec_version = (Int2)myargs[11].intvalue;
92 
93 					/* open the ASN.1 input file in the right mode */
94 	if ((aip = AsnIoOpen (myargs[0].strvalue, myargs[3].intvalue?"rb":"r"))
95           == NULL)
96 	{
97 		ErrShow();
98 		return 1;
99 	}
100 	aip->spec_version = spec_version;
101 
102 
103 					/* open the ASN.1 output file in the right mode */
104 	if (myargs[5].strvalue != NULL)
105 	{
106 		if ((aipout = AsnIoOpen (myargs[5].strvalue, myargs[6].intvalue?"wb":"w"))
107     	      == NULL)
108 		{
109 			ErrShow();
110 			return 1;
111 		}
112 	}
113 
114 	if ((! myargs[3].intvalue) && (myargs[2].intvalue))
115 	{
116 									/* scan past any headers */
117 		found = FALSE;
118 		while (! found)
119 		{
120 			tmp = AsnIoGets(aip);
121 			if (tmp == NULL) {
122 			  printf("Unable to read file\n");
123 			  return 0;
124 			}
125 			*(aip->buf + aip->offset -1) = '\0';
126 			if (strstr(tmp, "Seq-submit") != NULL)
127 			{
128 				found = TRUE;
129 			}
130 			*(aip->buf + aip->offset - 1) = '\n';
131 		}
132 
133 	}
134 
135 	vsp = ValidStructNew();
136 
137 	vsp->cutoff = (Int2)(myargs[10].intvalue);
138 	vsp->useSeqMgrIndexes = (Boolean)(myargs[12].intvalue); /* indexed validate */
139 	vsp->validateAlignments = (Boolean)(myargs[13].intvalue);
140 	vsp->farIDsInAlignments = (Boolean)(myargs[13].intvalue);
141 	vsp->alwaysRequireIsoJTA = (Boolean)(myargs[14].intvalue);
142 
143 	if (myargs[8].intvalue)   /* continue on ASN.1 error */
144 		AsnIoSetErrorMsg(aip, error_ret);
145 
146 	if (myargs[9].intvalue)   /* patch bad sequence residues */
147 		vsp->patch_seq = TRUE;
148 
149 	numerrors = 0;
150 	found_one = FALSE;
151 	if ( myargs[1].intvalue)   /* read one Seq-entry */
152 	{
153 		sep = SeqEntryAsnRead(aip, NULL);
154 
155 		if (sep == NULL)
156 		{
157 			fatal_error++;
158 			numerrors++;
159 			ErrPostEx(SEV_ERROR,0,0, "Couldn't load [%s]\n", myargs[0].strvalue);
160 		}
161 		else
162 		{
163 			found_one = TRUE;
164 			if (aip->io_failure)
165 			{
166 				vsp->non_ascii_chars = TRUE;
167 				aip->io_failure = FALSE;
168 			}
169 			ValidateSeqEntry(sep, vsp);
170 			for (i = 0; i <= 4 ; i++)
171 			{
172 				numerrors += vsp->errors[i];
173 				if (i >= error_level)
174 					fatal_error += vsp->errors[i];
175 			}
176 			if (aipout != NULL)
177 				SeqEntryAsnWrite(sep, aipout, NULL);
178 			SeqEntryFree(sep);
179 		}
180 	}
181 	else                      /* read Seq-entry's from a Bioseq-set */
182 	{
183 		while ((atp = AsnReadId(aip, amp, atp)) != NULL)
184 		{
185 			found_one = TRUE;
186 			if (atp == atp2)    /* top level Seq-entry */
187 			{
188 				sep = SeqEntryAsnRead(aip, atp);
189 				if (sep == NULL)
190 				{
191 					numerrors++;
192 					fatal_error++;
193 				}
194 				if (aip->io_failure)
195 				{
196 					vsp->non_ascii_chars = TRUE;
197 					aip->io_failure = FALSE;
198 				}
199 				ValidateSeqEntry(sep, vsp);
200 				if (aipout != NULL)
201 					SeqEntryAsnWrite(sep, aipout, atp);
202 				SeqEntryFree(sep);
203 				for (i = 0; i <=4; i++)
204 				{
205 					numerrors += vsp->errors[i];
206 					if (i >= error_level)
207 						fatal_error += vsp->errors[i];
208 				}
209 				ValidStructClear(vsp);
210 			}
211 			else
212 			{
213 				AsnReadVal(aip, atp, &av);
214 				if (aipout != NULL)
215 					AsnWrite(aipout, atp, &av);
216 				AsnKillValue(atp, &av);
217 			}
218 		}
219 	}
220 
221 	AsnIoClose(aip);
222 	AsnIoClose(aipout);
223 
224 	ValidStructFree(vsp);
225 
226 	if (myargs[5].strvalue == NULL)   /* no output file */
227 	{
228 		if (! numerrors)
229 		{
230 			if (! found_one) {
231 				printf("Unable to read file\n");
232 			} else {
233 				printf("All entries are OK!\n");
234 			}
235 		}
236 		else
237 			printf("%d messges reported\n", (int)numerrors);
238 	}
239 
240 
241 	return( fatal_error );
242 }
243 
error_ret(Int2 level,CharPtr msg)244 void LIBCALLBACK error_ret (Int2 level, CharPtr msg)
245 {
246 	fprintf(stderr, "%s\n\n", msg);
247 	return;
248 }
249