1 /* gbparint.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: gbparint.c
27 *
28 * Author: Karl Sirotkin
29 *
30 * $Log: gbparint.c,v $
31 * Revision 6.10 2014/08/01 17:14:01 bazhin
32 * Added support for new format (4+2+S+{6|7|8}) WGS scaffolds.
33 *
34 * Revision 6.9 2009/10/02 19:46:00 kans
35 * address clang static analyzer warnings
36 *
37 * Revision 6.8 2004/07/22 16:08:35 bazhin
38 * Changes to parse gaps of unknown lengths (like "gap(unk100)")
39 * within location strings.
40 *
41 * Revision 6.7 2004/03/03 17:32:19 kans
42 * Nlm_gbparselex checks against NULL input
43 *
44 * Revision 6.6 2003/12/05 16:42:11 bazhin
45 * Nlm_gbparselex() and Nlm_gbparselex_ver() functions now can handle
46 * RefSeq and WGS accessions.
47 *
48 * Revision 6.5 2001/06/07 17:00:54 tatiana
49 * added gi option in Nlm_gbparselex()
50 *
51 * Revision 6.4 2000/03/20 23:38:39 aleksey
52 * Finally submitted the changes which have been made by serge bazhin
53 * and been kept in my local directory.
54 *
55 * These changes allow to establish user callback functions
56 * in 'Asn2ffJobPtr' structure which are called within
57 * 'SeqEntryToFlatAjp' function call.
58 * The new members are:
59 * user_data - pointer to a user context for passing data
60 * ajp_count_index - user defined function
61 * ajp_print_data - user defined function
62 * ajp_print_index - user defined function
63 *
64 * Revision 6.3 1999/04/06 19:42:55 bazhin
65 * Changes, related to flat2asn's ACCESSION.VERSION parsing.
66 *
67 * Revision 6.2 1999/04/02 21:15:07 tatiana
68 * accession.version added
69 *
70 * Revision 6.1 1997/10/24 21:28:39 bazhin
71 * Is able to distinguish and process "gap(...)" tokens inside
72 * of location entries. Made for CONTIG line join contents.
73 *
74 * Revision 6.0 1997/08/25 18:06:05 madden
75 * Revision changed to 6.0
76 *
77 * Revision 5.3 1997/06/19 18:38:01 vakatov
78 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
79 *
80 * Revision 5.2 1997/02/06 00:16:14 tatiana
81 * dealing with 2+6 accession
82 *
83 * Revision 5.1 1997/01/27 19:16:17 tatiana
84 * accept two-letter prefix in accession number
85 *
86 * Revision 5.0 1996/05/28 13:23:23 ostell
87 * Set to revision 5.0
88 *
89 * Revision 4.2 1996/05/21 21:12:05 tatiana
90 * bullet proof in gbparseint()
91 *
92 * Revision 4.1 1995/07/31 19:02:10 tatiana
93 * fix seq_id->choice
94 *
95 * Revision 1.8 1995/05/15 21:46:05 ostell
96 * added Log line
97 *
98 *
99 *
100 */
101
102 #include "parsegb.h"
103 #include "gbparlex.h"
104 #include "errdefn.h"
105 #include <sequtil.h>
106 #include <edutil.h>
107
108 #define TAKE_FIRST 1
109 #define TAKE_SECOND 2
110
111 void Nlm_gbgap PROTO((ValNodePtr PNTR currentPt, ValNodePtr PNTR retval,
112 Boolean unknown));
113
114 /*--------- do_Nlm_gbparse_error () ---------------*/
115
116 NLM_EXTERN void
do_Nlm_gbparse_error(CharPtr msg,CharPtr details)117 do_Nlm_gbparse_error (CharPtr msg, CharPtr details)
118 {
119 Int4 len = StringLen(msg) +7;
120 CharPtr errmsg, temp;
121
122 len += StringLen(details);
123 temp = errmsg= MemNew((size_t)len);
124 temp = StringMove(temp, msg);
125 temp = StringMove(temp, " at ");
126 temp = StringMove(temp, details);
127
128 ErrPostStr(SEV_ERROR, ERR_FEATURE_LocationParsing, errmsg);
129
130 MemFree(errmsg);
131 }
132 #define MAKE_THREAD_SAFE
133 #ifndef MAKE_THREAD_SAFE
134 static Nlm_gbparse_errfunc Err_func = do_Nlm_gbparse_error;
135 static Nlm_gbparse_rangefunc Range_func = NULL;
136 static Pointer Nlm_gbparse_range_data = NULL;
137 #define MACRO_THREAD_SAVE_STATIC
138
139 #else
140
141 #include <ncbithr.h>
142
143 static TNlmTls Err_func_tls=NULL;
144 static TNlmTls Range_func_tls=NULL;
145 static TNlmTls Nlm_gbparse_range_data_tls=NULL;
146
147 #define MACRO_THREAD_SAVE_STATIC \
148 Nlm_gbparse_errfunc Err_func = NULL; \
149 Nlm_gbparse_rangefunc Range_func = NULL; \
150 Pointer Nlm_gbparse_range_data = NULL; \
151 if(Err_func_tls) NlmTlsGetValue(Err_func_tls,(VoidPtr PNTR)&Err_func); \
152 if(!Err_func) Err_func = do_Nlm_gbparse_error; \
153 if(Range_func_tls) NlmTlsGetValue(Range_func_tls,(VoidPtr PNTR)&Range_func); \
154 if(Nlm_gbparse_range_data_tls) NlmTlsGetValue(Nlm_gbparse_range_data_tls,(VoidPtr PNTR)&Nlm_gbparse_range_data);
155 #endif
156
157 /*------------------ Nlm_gbcheck_range()-------------*/
158 static void
Nlm_gbcheck_range(Int4 num,SeqIdPtr idp,Boolean PNTR keep_rawPt,int PNTR num_errsPt,ValNodePtr head,ValNodePtr current)159 Nlm_gbcheck_range(Int4 num, SeqIdPtr idp, Boolean PNTR keep_rawPt, int PNTR num_errsPt, ValNodePtr head, ValNodePtr current)
160 {
161 Int4 len;
162 MACRO_THREAD_SAVE_STATIC;
163 if (Range_func != NULL){
164 len = (*Range_func)(Nlm_gbparse_range_data, idp);
165 if (len > 0)
166 if (num <0 || num >= len){
167 Nlm_gbparse_error("range error", head, current);
168 * keep_rawPt = TRUE;
169 (*num_errsPt) ++;
170 }
171 }
172 }
173
174 /*----------- Nlm_install_gbparse_error_handler ()-------------*/
175
176 NLM_EXTERN void
Nlm_install_gbparse_error_handler(Nlm_gbparse_errfunc new_func)177 Nlm_install_gbparse_error_handler(Nlm_gbparse_errfunc new_func)
178 {
179 #ifdef MAKE_THREAD_SAFE
180 NlmTlsSetValue(&Err_func_tls, (VoidPtr PNTR) new_func, NULL);
181 #else
182 Err_func = new_func;
183 #endif
184 }
185
186 /*----------- Nlm_install_gbparse_range_func ()-------------*/
187
188 NLM_EXTERN void
Nlm_install_gbparse_range_func(Pointer data,Nlm_gbparse_rangefunc new_func)189 Nlm_install_gbparse_range_func(Pointer data, Nlm_gbparse_rangefunc new_func)
190 {
191 #ifdef MAKE_THREAD_SAFE
192 NlmTlsSetValue(&Range_func_tls, (VoidPtr PNTR) new_func,NULL);
193 NlmTlsSetValue(&Nlm_gbparse_range_data_tls,data,NULL);
194 #else
195 Range_func = new_func;
196 Nlm_gbparse_range_data = data;
197 #endif
198
199 }
200
201 /*--------- Nlm_gbparse_error()-----------*/
202
203 NLM_EXTERN void
Nlm_gbparse_error(CharPtr front,ValNodePtr head,ValNodePtr current)204 Nlm_gbparse_error(CharPtr front, ValNodePtr head, ValNodePtr current)
205 {
206 CharPtr details;
207
208 MACRO_THREAD_SAVE_STATIC;
209
210 details = Nlm_gbparse_point (head, current);
211 Err_func (front,details);
212 MemFree(details);
213 }
214
215 /*------ Nlm_gbparse_point ()----*/
216
217 NLM_EXTERN CharPtr
Nlm_gbparse_point(ValNodePtr head,ValNodePtr current)218 Nlm_gbparse_point (ValNodePtr head, ValNodePtr current)
219 {
220 CharPtr temp, retval = NULL;
221 int len = 0;
222 ValNodePtr now;
223
224 for ( now = head; now ; now = now -> next){
225 switch ( now-> choice){
226 case GBPARSE_INT_JOIN :
227 len += 4;
228 break;
229 case GBPARSE_INT_COMPL :
230 len += 10;
231 break;
232 case GBPARSE_INT_LEFT :
233 case GBPARSE_INT_RIGHT :
234 case GBPARSE_INT_CARET :
235 case GBPARSE_INT_GT :
236 case GBPARSE_INT_LT :
237 case GBPARSE_INT_COMMA :
238 case GBPARSE_INT_SINGLE_DOT :
239 len ++;
240 break;
241 case GBPARSE_INT_DOT_DOT :
242 len += 2;
243 break;
244 case GBPARSE_INT_ACCESION :
245 case GBPARSE_INT_NUMBER :
246 len += StringLen ( now -> data.ptrvalue);
247 break;
248 case GBPARSE_INT_ORDER :
249 case GBPARSE_INT_GROUP :
250 len += 5;
251 break;
252 case GBPARSE_INT_ONE_OF :
253 case GBPARSE_INT_ONE_OF_NUM:
254 len += 6;
255 break;
256 case GBPARSE_INT_REPLACE :
257 len += 7;
258 break;
259 case GBPARSE_INT_STRING:
260 len += StringLen(now ->data.ptrvalue) + 1;
261 break;
262 case GBPARSE_INT_UNKNOWN :
263 default:
264 break;
265 }
266 len ++; /* for space */
267
268
269 if ( now == current)
270 break;
271 }
272
273
274 if (len > 0){
275 temp = retval = MemNew(len+1);
276 for ( now = head; now ; now = now -> next){
277 switch ( now-> choice){
278 case GBPARSE_INT_JOIN :
279 temp = StringMove(temp,"join");
280 break;
281 case GBPARSE_INT_COMPL :
282 temp = StringMove(temp,"complement");
283 break;
284 case GBPARSE_INT_LEFT :
285 temp = StringMove(temp,"(");
286 break;
287 case GBPARSE_INT_RIGHT :
288 temp = StringMove(temp,")");
289 break;
290 case GBPARSE_INT_CARET :
291 temp = StringMove(temp,"^");
292 break;
293 case GBPARSE_INT_DOT_DOT :
294 temp = StringMove(temp,"..");
295 break;
296 case GBPARSE_INT_ACCESION :
297 case GBPARSE_INT_NUMBER :
298 case GBPARSE_INT_STRING:
299 temp = StringMove(temp,now -> data.ptrvalue);
300 break;
301 case GBPARSE_INT_GT :
302 temp = StringMove(temp,">");
303 break;
304 case GBPARSE_INT_LT :
305 temp = StringMove(temp,"<");
306 break;
307 case GBPARSE_INT_COMMA :
308 temp = StringMove(temp,",");
309 break;
310 case GBPARSE_INT_ORDER :
311 temp = StringMove(temp,"order");
312 break;
313 case GBPARSE_INT_SINGLE_DOT :
314 temp = StringMove(temp,".");
315 break;
316 case GBPARSE_INT_GROUP :
317 temp = StringMove(temp,"group");
318 break;
319 case GBPARSE_INT_ONE_OF :
320 case GBPARSE_INT_ONE_OF_NUM:
321 temp = StringMove(temp,"one-of");
322 break;
323 case GBPARSE_INT_REPLACE :
324 temp = StringMove(temp,"replace");
325 break;
326 case GBPARSE_INT_UNKNOWN :
327 default:
328 break;
329 }
330 temp = StringMove(temp," ");
331 if ( now == current)
332 break;
333 }
334 }
335
336 return retval;
337 }
338
339 /*--------- Nlm_find_one_of_num()------------*/
340 /*
341
342 Consider these for locations:
343 misc_signal join(57..one-of(67,75),one-of(100,110)..200)
344 misc_signal join(57..one-of(67,75),one-of(100,110..120),200)
345 misc_signal join(57..one-of(67,75),one-of(100,110..115)..200)
346
347 misc_signal join(57..one-of(67,75),one-of(100,110),200)
348
349 In the first three, the one-of() is functioning as an alternative set
350 of numbers, in the last, as an alternative set of locations (even
351 though the locations are points).
352 [yes the one-of(100,110..115).. is illegal]
353
354 here is one more case:one-of(18,30)..470 so if the location
355 starts with a one-of, it also needs to be checked.
356
357 To deal with this, the GBPARSE_INT_ONE_OF token type will be changed
358 by the following function to GBPARSE_INT_ONE_OF_NUM, in the three cases.
359
360 note that this change is not necessary in this case:
361 join(100..200,300..one-of(400,500)), as after a ".." token,
362 it has to be a number.
363
364 */
365
366 static void
Nlm_find_one_of_num(ValNodePtr head_token)367 Nlm_find_one_of_num(ValNodePtr head_token)
368 {
369 ValNodePtr current, scanner;
370
371 current = head_token;
372 if (current -> choice == GBPARSE_INT_ONE_OF){
373 scanner= current -> next;
374 /*-------(is first token after ")" a ".."?----*/
375 for (;scanner!=NULL; scanner = scanner -> next){
376 if (scanner -> choice == GBPARSE_INT_RIGHT){
377 scanner = scanner -> next;
378 if (scanner != NULL){
379 if (scanner -> choice == GBPARSE_INT_DOT_DOT){
380 /*---- this is it ! ! */
381 current -> choice = GBPARSE_INT_ONE_OF_NUM;
382 }
383 }
384 break;
385 }
386 }
387 }
388 for (current = head_token; current != NULL; current = current -> next){
389 if ( current -> choice == GBPARSE_INT_COMMA ||
390 current -> choice == GBPARSE_INT_LEFT ){
391 scanner= current -> next;
392 if ( scanner != NULL){
393 if (scanner -> choice == GBPARSE_INT_ONE_OF){
394 /*-------(is first token after ")" a ".."?----*/
395 for (;scanner!=NULL; scanner = scanner -> next){
396 if (scanner -> choice == GBPARSE_INT_RIGHT){
397 scanner = scanner -> next;
398 if (scanner != NULL){
399 if (scanner -> choice == GBPARSE_INT_DOT_DOT){
400 /*---- this is it ! ! */
401 current -> next -> choice
402 = GBPARSE_INT_ONE_OF_NUM;
403 }
404 }
405 break;
406 }
407 }
408 }
409 }
410 }
411 }
412
413 }
414
415 /*---------- Nlm_gbparseint()-----*/
416
417 NLM_EXTERN SeqLocPtr
Nlm_gbparseint(CharPtr raw_intervals,Boolean PNTR keep_rawPt,Boolean PNTR sitesPt,int PNTR num_errsPt,SeqIdPtr seq_id)418 Nlm_gbparseint(CharPtr raw_intervals, Boolean PNTR keep_rawPt, Boolean PNTR sitesPt, int PNTR num_errsPt, SeqIdPtr seq_id)
419 {
420 SeqLocPtr retval = NULL;
421 ValNodePtr head_token, current_token;
422 int paren_count = 0;
423 Boolean go_again;
424
425 * keep_rawPt = FALSE;
426 * sitesPt = FALSE;
427
428 head_token = NULL;
429 (*num_errsPt) = gbparselex(raw_intervals, & head_token);
430
431 if (head_token == NULL) {
432 *num_errsPt = 1;
433 return NULL;
434 }
435 if ( ! (*num_errsPt)){
436 current_token = head_token;
437 Nlm_find_one_of_num(head_token);
438
439 do {
440 go_again= FALSE;
441 if (current_token)
442 switch ( current_token -> choice){
443 case GBPARSE_INT_JOIN : case GBPARSE_INT_ORDER :
444 case GBPARSE_INT_GROUP : case GBPARSE_INT_ONE_OF :
445 case GBPARSE_INT_COMPL:
446 retval = Nlm_gbloc(keep_rawPt, & paren_count, sitesPt, & current_token,
447 head_token, (num_errsPt), seq_id);
448 /* need to check that out of tokens here */
449 retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
450 head_token, retval, keep_rawPt, paren_count);
451 break;
452 case GBPARSE_INT_STRING:
453 Nlm_gbparse_error("string in loc",
454 head_token, current_token);
455 * keep_rawPt = TRUE; (* num_errsPt) ++;
456 /* no break on purpose */
457 case GBPARSE_INT_UNKNOWN :
458 default:
459 case GBPARSE_INT_RIGHT :
460 case GBPARSE_INT_DOT_DOT :
461 case GBPARSE_INT_COMMA :
462 case GBPARSE_INT_SINGLE_DOT :
463
464 Nlm_gbparse_error("illegal initial token",
465 head_token, current_token);
466 * keep_rawPt = TRUE; (* num_errsPt) ++;
467 current_token = current_token -> next;
468 break;
469
470 case GBPARSE_INT_ACCESION :
471 /*--- no warn, but strange ---*/
472 /*-- no break on purpose ---*/
473
474 case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
475 case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
476 case GBPARSE_INT_LEFT :
477
478 case GBPARSE_INT_ONE_OF_NUM:
479
480 retval = Nlm_gbint(keep_rawPt, & current_token,
481 head_token, (num_errsPt), seq_id);
482 /* need to check that out of tokens here */
483 retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
484 head_token, retval, keep_rawPt, paren_count);
485 break;
486
487 case GBPARSE_INT_REPLACE :
488 retval = Nlm_gbreplace(keep_rawPt, & paren_count, sitesPt, & current_token,
489 head_token, (num_errsPt), seq_id);
490 * keep_rawPt = TRUE;
491 /*---all errors handled within this function ---*/
492 break;
493 case GBPARSE_INT_SITES :
494 * sitesPt = TRUE;
495 go_again = TRUE;
496 current_token = current_token -> next;
497 break;
498 }
499 }while (go_again && current_token);
500 }else{
501 * keep_rawPt = TRUE;
502 }
503
504 if ( head_token)
505 ValNodeFreeData(head_token);
506
507 if ( (*num_errsPt)){
508 SeqLocFree(retval);
509 retval = NULL;
510 }
511 return retval;
512 }
513
514 /*---------- Nlm_gbparseint_ver()-----*/
515
Nlm_gbparseint_ver(CharPtr raw_intervals,Boolean PNTR keep_rawPt,Boolean PNTR sitesPt,int PNTR num_errsPt,SeqIdPtr seq_id,Boolean accver)516 NLM_EXTERN SeqLocPtr Nlm_gbparseint_ver(CharPtr raw_intervals,
517 Boolean PNTR keep_rawPt,
518 Boolean PNTR sitesPt,
519 int PNTR num_errsPt,
520 SeqIdPtr seq_id, Boolean accver)
521 {
522 SeqLocPtr retval = NULL;
523 ValNodePtr head_token, current_token;
524 int paren_count = 0;
525 Boolean go_again;
526
527 * keep_rawPt = FALSE;
528 * sitesPt = FALSE;
529
530 head_token = NULL;
531 (*num_errsPt) = Nlm_gbparselex_ver(raw_intervals, &head_token, accver);
532
533 if (head_token == NULL) {
534 *num_errsPt = 1;
535 return NULL;
536 }
537 if ( ! (*num_errsPt)){
538 current_token = head_token;
539 Nlm_find_one_of_num(head_token);
540
541 do {
542 go_again= FALSE;
543 if (current_token)
544 switch ( current_token -> choice){
545 case GBPARSE_INT_JOIN : case GBPARSE_INT_ORDER :
546 case GBPARSE_INT_GROUP : case GBPARSE_INT_ONE_OF :
547 case GBPARSE_INT_COMPL:
548 retval = Nlm_gbloc_ver(keep_rawPt, & paren_count, sitesPt, & current_token,
549 head_token, (num_errsPt), seq_id, accver);
550 /* need to check that out of tokens here */
551 retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
552 head_token, retval, keep_rawPt, paren_count);
553 break;
554 case GBPARSE_INT_STRING:
555 Nlm_gbparse_error("string in loc",
556 head_token, current_token);
557 * keep_rawPt = TRUE; (* num_errsPt) ++;
558 /* no break on purpose */
559 case GBPARSE_INT_UNKNOWN :
560 default:
561 case GBPARSE_INT_RIGHT :
562 case GBPARSE_INT_DOT_DOT :
563 case GBPARSE_INT_COMMA :
564 case GBPARSE_INT_SINGLE_DOT :
565
566 Nlm_gbparse_error("illegal initial token",
567 head_token, current_token);
568 * keep_rawPt = TRUE; (* num_errsPt) ++;
569 current_token = current_token -> next;
570 break;
571
572 case GBPARSE_INT_ACCESION :
573 /*--- no warn, but strange ---*/
574 /*-- no break on purpose ---*/
575
576 case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
577 case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
578 case GBPARSE_INT_LEFT :
579
580 case GBPARSE_INT_ONE_OF_NUM:
581
582 retval = Nlm_gbint_ver(keep_rawPt, & current_token,
583 head_token, (num_errsPt), seq_id, accver);
584 /* need to check that out of tokens here */
585 retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
586 head_token, retval, keep_rawPt, paren_count);
587 break;
588
589 case GBPARSE_INT_REPLACE :
590 retval = Nlm_gbreplace_ver(keep_rawPt, & paren_count, sitesPt, & current_token,
591 head_token, (num_errsPt), seq_id, accver);
592 * keep_rawPt = TRUE;
593 /*---all errors handled within this function ---*/
594 break;
595 case GBPARSE_INT_SITES :
596 * sitesPt = TRUE;
597 go_again = TRUE;
598 current_token = current_token -> next;
599 break;
600 }
601 }while (go_again && current_token);
602 }else{
603 * keep_rawPt = TRUE;
604 }
605
606 if ( head_token)
607 ValNodeFreeData(head_token);
608
609 if ( (*num_errsPt)){
610 SeqLocFree(retval);
611 retval = NULL;
612 }
613 return retval;
614 }
615
616 /*---------- Nlm_gbloc()-----*/
617
618 NLM_EXTERN SeqLocPtr
Nlm_gbloc(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id)619 Nlm_gbloc(Boolean PNTR keep_rawPt, int PNTR parenPt, Boolean PNTR sitesPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
620 {
621 SeqLocPtr retval =NULL;
622 Boolean add_nulls=FALSE;
623 ValNodePtr current_token = * currentPt;
624 Boolean did_complement= FALSE;
625 Boolean go_again ;
626
627 do {
628 go_again= FALSE;
629 switch ( current_token -> choice){
630 case GBPARSE_INT_COMPL :
631 *currentPt = (* currentPt) -> next;
632 if ( (*currentPt) == NULL){
633 Nlm_gbparse_error("unexpected end of usable tokens",
634 head_token, *currentPt);
635 * keep_rawPt = TRUE; (* num_errPt) ++;
636 goto FATAL;
637 }
638 if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
639 Nlm_gbparse_error("Missing \'(\'", /* paran match ) */
640 head_token, * currentPt);
641 * keep_rawPt = TRUE; (* num_errPt) ++;
642 goto FATAL;
643 }else{
644 (*parenPt) ++; *currentPt = (* currentPt) -> next;
645 if ( ! * currentPt){
646 Nlm_gbparse_error("illegal null contents",
647 head_token, *currentPt);
648 * keep_rawPt = TRUE; (* num_errPt) ++;
649 goto FATAL;
650 }else{
651 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
652 Nlm_gbparse_error("Premature \')\'",
653 head_token, *currentPt);
654 * keep_rawPt = TRUE; (* num_errPt) ++;
655 goto FATAL;
656 }else{
657 retval = Nlm_gbloc (keep_rawPt, parenPt, sitesPt, currentPt,
658 head_token, num_errPt,seq_id) ;
659 SeqLocRevCmp ( retval);
660 did_complement= TRUE;
661 if ( * currentPt){
662 if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
663 Nlm_gbparse_error("Missing \')\'",
664 head_token, *currentPt);
665 * keep_rawPt = TRUE; (* num_errPt) ++;
666 goto FATAL;
667 }else{
668 (*parenPt) --; *currentPt = (* currentPt) -> next;
669 }
670 }else{
671 Nlm_gbparse_error("Missing \')\'",
672 head_token, *currentPt);
673 * keep_rawPt = TRUE; (* num_errPt) ++;
674 goto FATAL;
675 }
676 }
677 }
678 }
679 break;
680 /* REAL LOCS */
681 case GBPARSE_INT_JOIN : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; break;
682 case GBPARSE_INT_ORDER : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; add_nulls=TRUE;break;
683 case GBPARSE_INT_GROUP : * keep_rawPt = TRUE; retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX;
684 break;
685 case GBPARSE_INT_ONE_OF : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_EQUIV; break;
686
687 /* ERROR */
688 case GBPARSE_INT_STRING:
689 Nlm_gbparse_error("string in loc",
690 head_token, current_token);
691 * keep_rawPt = TRUE; (* num_errPt) ++;
692 goto FATAL;
693 /*--- no break on purpose---*/
694 case GBPARSE_INT_UNKNOWN : default:
695 case GBPARSE_INT_RIGHT : case GBPARSE_INT_DOT_DOT:case GBPARSE_INT_COMMA :
696 case GBPARSE_INT_SINGLE_DOT :
697 Nlm_gbparse_error("illegal initial loc token",
698 head_token, *currentPt);
699 * keep_rawPt = TRUE; (* num_errPt) ++;
700 goto FATAL;
701
702 /* Interval, occurs on recursion */
703 case GBPARSE_INT_GAP:
704 Nlm_gbgap(currentPt, &retval, FALSE);
705 break;
706 case GBPARSE_INT_UNK_GAP:
707 Nlm_gbgap(currentPt, &retval, TRUE);
708 break;
709 case GBPARSE_INT_ACCESION :
710 case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
711 case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
712 case GBPARSE_INT_LEFT :
713
714 case GBPARSE_INT_ONE_OF_NUM:
715
716 retval = Nlm_gbint(keep_rawPt, currentPt,
717 head_token, num_errPt, seq_id);
718 break;
719
720 case GBPARSE_INT_REPLACE :
721 /*-------illegal at this level --*/
722 Nlm_gbparse_error("illegal replace",
723 head_token, *currentPt);
724 * keep_rawPt = TRUE; (* num_errPt) ++;
725 goto FATAL;
726 case GBPARSE_INT_SITES :
727 * sitesPt = TRUE;
728 go_again = TRUE;
729 (*currentPt) = (*currentPt) -> next;
730 break;
731 }
732 } while (go_again && *currentPt);
733
734 if ( ! (* num_errPt)) if (retval && retval->choice != SEQLOC_NULL)
735 if ( retval -> choice != SEQLOC_INT && retval -> choice != SEQLOC_PNT
736 && ! did_complement){
737 /*--------
738 * ONLY THE CHOICE has been set. the "join", etc. only has been noted
739 *----*/
740 *currentPt = (* currentPt) -> next;
741 if ( ! * currentPt){
742 Nlm_gbparse_error("unexpected end of interval tokens",
743 head_token, *currentPt);
744 * keep_rawPt = TRUE; (* num_errPt) ++;
745 goto FATAL;
746 }else{
747 if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
748 Nlm_gbparse_error("Missing \'(\'",
749 head_token, *currentPt); /* paran match ) */
750 * keep_rawPt = TRUE; (* num_errPt) ++;
751 goto FATAL;
752 }else{
753 (*parenPt) ++; *currentPt = (* currentPt) -> next;
754 if ( ! * currentPt){
755 Nlm_gbparse_error("illegal null contents",
756 head_token, *currentPt);
757 * keep_rawPt = TRUE; (* num_errPt) ++;
758 goto FATAL;
759 }else{
760 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
761 Nlm_gbparse_error("Premature \')\'" ,
762 head_token, *currentPt);
763 * keep_rawPt = TRUE; (* num_errPt) ++;
764 goto FATAL;
765 }else{
766
767 ValNodePtr last= NULL, next_loc = NULL;
768
769 while ( ! *num_errPt && * currentPt){
770 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
771 while ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
772 (*parenPt) --;
773 *currentPt = (* currentPt) -> next;
774 if ( ! *currentPt)
775 break;
776 }
777 break;
778 }
779 if ( ! * currentPt){
780 break;
781 }
782 next_loc = Nlm_gbloc(keep_rawPt, parenPt,sitesPt,
783 currentPt, head_token, num_errPt,
784 seq_id);
785 if( retval -> data.ptrvalue == NULL)
786 retval -> data.ptrvalue = next_loc;
787 if ( last)
788 last -> next = next_loc;
789 last = next_loc;
790 if ( ! * currentPt){
791 break;
792 }
793 if ( ! * currentPt){
794 break;
795 }
796 if ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
797 break;
798 }
799 if ( (* currentPt) -> choice == GBPARSE_INT_COMMA){
800 *currentPt = (* currentPt) -> next;
801 if(add_nulls){
802 next_loc = ValNodeNew(last);
803 next_loc -> choice = SEQLOC_NULL;
804 last -> next = next_loc;
805 last = next_loc;
806 }
807 }else{
808 Nlm_gbparse_error("Illegal token after interval",
809 head_token, *currentPt);
810 * keep_rawPt = TRUE; (* num_errPt) ++;
811 goto FATAL;
812 }
813 }
814 }
815 }
816 if ( (*currentPt) == NULL){
817 Nlm_gbparse_error("unexpected end of usable tokens",
818 head_token, *currentPt);
819 * keep_rawPt = TRUE; (* num_errPt) ++;
820 goto FATAL;
821 }else{
822 if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
823 Nlm_gbparse_error("Missing \')\'" /* paran match ) */,
824 head_token, *currentPt);
825 * keep_rawPt = TRUE; (* num_errPt) ++;
826 goto FATAL;
827 }else{
828 (*parenPt) --; *currentPt = (* currentPt) -> next;
829 }
830 }
831 }
832 }
833 }
834
835 FATAL:
836 if ( (* num_errPt)){
837 if (retval){
838 SeqLocFree(retval);
839 retval =ValNodeNew(NULL);
840 retval -> choice = SEQLOC_WHOLE;
841 retval -> data.ptrvalue = SeqIdDup(seq_id);
842 }
843 }
844
845 return retval;
846 }
847
848 /*---------- Nlm_gbloc_ver()-----*/
849
Nlm_gbloc_ver(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id,Boolean accver)850 NLM_EXTERN SeqLocPtr Nlm_gbloc_ver(Boolean PNTR keep_rawPt, int PNTR parenPt,
851 Boolean PNTR sitesPt, ValNodePtr PNTR currentPt,
852 ValNodePtr head_token, int PNTR num_errPt,
853 SeqIdPtr seq_id, Boolean accver)
854 {
855 SeqLocPtr retval =NULL;
856 Boolean add_nulls=FALSE;
857 ValNodePtr current_token = * currentPt;
858 Boolean did_complement= FALSE;
859 Boolean go_again ;
860
861 do {
862 go_again= FALSE;
863 switch ( current_token -> choice){
864 case GBPARSE_INT_COMPL :
865 *currentPt = (* currentPt) -> next;
866 if ( (*currentPt) == NULL){
867 Nlm_gbparse_error("unexpected end of usable tokens",
868 head_token, *currentPt);
869 * keep_rawPt = TRUE; (* num_errPt) ++;
870 goto FATAL;
871 }
872 if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
873 Nlm_gbparse_error("Missing \'(\'", /* paran match ) */
874 head_token, * currentPt);
875 * keep_rawPt = TRUE; (* num_errPt) ++;
876 goto FATAL;
877 }else{
878 (*parenPt) ++; *currentPt = (* currentPt) -> next;
879 if ( ! * currentPt){
880 Nlm_gbparse_error("illegal null contents",
881 head_token, *currentPt);
882 * keep_rawPt = TRUE; (* num_errPt) ++;
883 goto FATAL;
884 }else{
885 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
886 Nlm_gbparse_error("Premature \')\'",
887 head_token, *currentPt);
888 * keep_rawPt = TRUE; (* num_errPt) ++;
889 goto FATAL;
890 }else{
891 retval = Nlm_gbloc_ver (keep_rawPt, parenPt, sitesPt, currentPt,
892 head_token, num_errPt,seq_id, accver) ;
893 SeqLocRevCmp ( retval);
894 did_complement= TRUE;
895 if ( * currentPt){
896 if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
897 Nlm_gbparse_error("Missing \')\'",
898 head_token, *currentPt);
899 * keep_rawPt = TRUE; (* num_errPt) ++;
900 goto FATAL;
901 }else{
902 (*parenPt) --; *currentPt = (* currentPt) -> next;
903 }
904 }else{
905 Nlm_gbparse_error("Missing \')\'",
906 head_token, *currentPt);
907 * keep_rawPt = TRUE; (* num_errPt) ++;
908 goto FATAL;
909 }
910 }
911 }
912 }
913 break;
914 /* REAL LOCS */
915 case GBPARSE_INT_JOIN : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; break;
916 case GBPARSE_INT_ORDER : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; add_nulls=TRUE;break;
917 case GBPARSE_INT_GROUP : * keep_rawPt = TRUE; retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX;
918 break;
919 case GBPARSE_INT_ONE_OF : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_EQUIV; break;
920
921 /* ERROR */
922 case GBPARSE_INT_STRING:
923 Nlm_gbparse_error("string in loc",
924 head_token, current_token);
925 * keep_rawPt = TRUE; (* num_errPt) ++;
926 goto FATAL;
927 /*--- no break on purpose---*/
928 case GBPARSE_INT_UNKNOWN : default:
929 case GBPARSE_INT_RIGHT : case GBPARSE_INT_DOT_DOT:case GBPARSE_INT_COMMA :
930 case GBPARSE_INT_SINGLE_DOT :
931 Nlm_gbparse_error("illegal initial loc token",
932 head_token, *currentPt);
933 * keep_rawPt = TRUE; (* num_errPt) ++;
934 goto FATAL;
935
936 /* Interval, occurs on recursion */
937 case GBPARSE_INT_GAP:
938 Nlm_gbgap(currentPt, &retval, FALSE);
939 break;
940 case GBPARSE_INT_UNK_GAP:
941 Nlm_gbgap(currentPt, &retval, TRUE);
942 break;
943 case GBPARSE_INT_ACCESION :
944 case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
945 case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
946 case GBPARSE_INT_LEFT :
947
948 case GBPARSE_INT_ONE_OF_NUM:
949
950 retval = Nlm_gbint_ver(keep_rawPt, currentPt,
951 head_token, num_errPt, seq_id, accver);
952 break;
953
954 case GBPARSE_INT_REPLACE :
955 /*-------illegal at this level --*/
956 Nlm_gbparse_error("illegal replace",
957 head_token, *currentPt);
958 * keep_rawPt = TRUE; (* num_errPt) ++;
959 goto FATAL;
960 case GBPARSE_INT_SITES :
961 * sitesPt = TRUE;
962 go_again = TRUE;
963 (*currentPt) = (*currentPt) -> next;
964 break;
965 }
966 } while (go_again && *currentPt);
967
968 if ( ! (* num_errPt)) if (retval && retval->choice != SEQLOC_NULL)
969 if ( retval -> choice != SEQLOC_INT && retval -> choice != SEQLOC_PNT
970 && ! did_complement){
971 /*--------
972 * ONLY THE CHOICE has been set. the "join", etc. only has been noted
973 *----*/
974 *currentPt = (* currentPt) -> next;
975 if ( ! * currentPt){
976 Nlm_gbparse_error("unexpected end of interval tokens",
977 head_token, *currentPt);
978 * keep_rawPt = TRUE; (* num_errPt) ++;
979 goto FATAL;
980 }else{
981 if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
982 Nlm_gbparse_error("Missing \'(\'",
983 head_token, *currentPt); /* paran match ) */
984 * keep_rawPt = TRUE; (* num_errPt) ++;
985 goto FATAL;
986 }else{
987 (*parenPt) ++; *currentPt = (* currentPt) -> next;
988 if ( ! * currentPt){
989 Nlm_gbparse_error("illegal null contents",
990 head_token, *currentPt);
991 * keep_rawPt = TRUE; (* num_errPt) ++;
992 goto FATAL;
993 }else{
994 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
995 Nlm_gbparse_error("Premature \')\'" ,
996 head_token, *currentPt);
997 * keep_rawPt = TRUE; (* num_errPt) ++;
998 goto FATAL;
999 }else{
1000
1001 ValNodePtr last= NULL, next_loc = NULL;
1002
1003 while ( ! *num_errPt && * currentPt){
1004 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1005 while ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
1006 (*parenPt) --;
1007 *currentPt = (* currentPt) -> next;
1008 if ( ! *currentPt)
1009 break;
1010 }
1011 break;
1012 }
1013 if ( ! * currentPt){
1014 break;
1015 }
1016 next_loc = Nlm_gbloc_ver(keep_rawPt, parenPt,sitesPt,
1017 currentPt, head_token, num_errPt,
1018 seq_id, accver);
1019 if( retval -> data.ptrvalue == NULL)
1020 retval -> data.ptrvalue = next_loc;
1021 if ( last)
1022 last -> next = next_loc;
1023 last = next_loc;
1024 if ( ! * currentPt){
1025 break;
1026 }
1027 if ( ! * currentPt){
1028 break;
1029 }
1030 if ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
1031 break;
1032 }
1033 if ( (* currentPt) -> choice == GBPARSE_INT_COMMA){
1034 *currentPt = (* currentPt) -> next;
1035 if(add_nulls){
1036 next_loc = ValNodeNew(last);
1037 next_loc -> choice = SEQLOC_NULL;
1038 last -> next = next_loc;
1039 last = next_loc;
1040 }
1041 }else{
1042 Nlm_gbparse_error("Illegal token after interval",
1043 head_token, *currentPt);
1044 * keep_rawPt = TRUE; (* num_errPt) ++;
1045 goto FATAL;
1046 }
1047 }
1048 }
1049 }
1050 if ( (*currentPt) == NULL){
1051 Nlm_gbparse_error("unexpected end of usable tokens",
1052 head_token, *currentPt);
1053 * keep_rawPt = TRUE; (* num_errPt) ++;
1054 goto FATAL;
1055 }else{
1056 if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
1057 Nlm_gbparse_error("Missing \')\'" /* paran match ) */,
1058 head_token, *currentPt);
1059 * keep_rawPt = TRUE; (* num_errPt) ++;
1060 goto FATAL;
1061 }else{
1062 (*parenPt) --; *currentPt = (* currentPt) -> next;
1063 }
1064 }
1065 }
1066 }
1067 }
1068
1069 FATAL:
1070 if ( (* num_errPt)){
1071 if (retval){
1072 SeqLocFree(retval);
1073 retval =ValNodeNew(NULL);
1074 retval -> choice = SEQLOC_WHOLE;
1075 retval -> data.ptrvalue = SeqIdDup(seq_id);
1076 }
1077 }
1078
1079 return retval;
1080 }
1081
1082 /**********************************************************/
Nlm_gbgap(ValNodePtr PNTR currentPt,ValNodePtr PNTR retval,Boolean unknown)1083 void Nlm_gbgap(ValNodePtr PNTR currentPt, ValNodePtr PNTR retval,
1084 Boolean unknown)
1085 {
1086 ValNodePtr vnp_first;
1087 ValNodePtr vnp_second;
1088 ValNodePtr vnp_third;
1089 SeqLocPtr vvv;
1090
1091 vnp_first = (*currentPt)->next;
1092 if(vnp_first == NULL || vnp_first->choice != GBPARSE_INT_LEFT)
1093 return;
1094
1095 vnp_second = vnp_first->next;
1096 if(vnp_second == NULL || (vnp_second->choice != GBPARSE_INT_NUMBER &&
1097 vnp_second->choice != GBPARSE_INT_RIGHT))
1098 return;
1099
1100 if(vnp_second->choice == GBPARSE_INT_RIGHT)
1101 {
1102 (*retval) = ValNodeNew(*retval);
1103 (*retval)->choice = SEQLOC_NULL;
1104 }
1105 else
1106 {
1107 vnp_third = vnp_second->next;
1108 if(vnp_third == NULL || vnp_third->choice != GBPARSE_INT_RIGHT)
1109 return;
1110
1111 vvv = GapToSeqLocEx(atoi((CharPtr) vnp_second->data.ptrvalue), unknown);
1112 if(vvv == NULL)
1113 return;
1114
1115 if(*retval == NULL)
1116 (*retval) = vvv;
1117 else
1118 {
1119 (*retval)->next = vvv;
1120 (*retval) = (*retval)->next;
1121 }
1122
1123 (*currentPt) = (*currentPt)->next;
1124 }
1125
1126 (*currentPt) = (*currentPt)->next;
1127 (*currentPt) = (*currentPt)->next;
1128 (*currentPt) = (*currentPt)->next;
1129 }
1130
1131 /*--------------- Nlm_gbint ()--------------------*/
1132
1133 NLM_EXTERN SeqLocPtr /* sometimes returns points */
1134
Nlm_gbint(Boolean PNTR keep_rawPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id)1135 Nlm_gbint(Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
1136 {
1137 SeqLocPtr retnode = ValNodeNew(NULL);
1138 SeqIntPtr retint = SeqIntNew();
1139 TextSeqIdPtr tp;
1140 IntFuzzPtr fuzz=NULL;
1141 SeqIdPtr idp = NULL;
1142 Boolean took_choice=FALSE;
1143
1144 retnode -> choice = SEQLOC_INT;
1145
1146 if ( (* currentPt) -> choice == GBPARSE_INT_ACCESION){
1147 idp = ValNodeNew(NULL);
1148 if (seq_id){
1149 if (
1150 seq_id -> choice == SEQID_GENBANK
1151 || seq_id -> choice == SEQID_EMBL
1152 || seq_id -> choice == SEQID_DDBJ
1153 ){
1154 idp -> choice = seq_id -> choice;
1155 took_choice = TRUE;
1156 }
1157 }
1158 if (! took_choice){
1159 idp -> choice = SEQID_GENBANK;
1160 }
1161 tp = TextSeqIdNew();
1162 idp -> data.ptrvalue = tp;
1163 tp -> accession = StringSave ( (* currentPt) ->data.ptrvalue);
1164 *currentPt = (* currentPt) -> next;
1165 if ( ! *currentPt ){
1166 Nlm_gbparse_error("Nothing after accession",
1167 head_token, *currentPt);
1168 * keep_rawPt = TRUE; (* num_errPt) ++;
1169
1170 SeqIdFree(idp);
1171 idp = NULL;
1172
1173 goto FATAL;
1174 }
1175 }else{
1176 idp = SeqIdDup (seq_id);
1177 }
1178 if ( (* currentPt) -> choice == GBPARSE_INT_LT){
1179 fuzz = IntFuzzNew();
1180 fuzz -> choice = 4;
1181 fuzz ->a = 2;
1182 *currentPt = (* currentPt) -> next;
1183 if ( ! *currentPt ){
1184 Nlm_gbparse_error("Nothing after \'<\'",
1185 head_token, *currentPt);
1186 * keep_rawPt = TRUE; (* num_errPt) ++;
1187 goto FATAL;
1188 }
1189 }
1190 if ( ! (* num_errPt))
1191 switch ( (*currentPt ) -> choice){
1192 case GBPARSE_INT_ACCESION :
1193 if ( idp){
1194 Nlm_gbparse_error("duplicate accessions",
1195 head_token, *currentPt);
1196 * keep_rawPt = TRUE; (* num_errPt) ++;
1197 goto FATAL;
1198 }
1199 break;
1200 case GBPARSE_INT_CARET :
1201 Nlm_gbparse_error("caret (^) before number" ,
1202 head_token, *currentPt);
1203 * keep_rawPt = TRUE; (* num_errPt) ++;
1204 goto FATAL;
1205 case GBPARSE_INT_LT :
1206 if ( idp){
1207 Nlm_gbparse_error("duplicate \'<\'",
1208 head_token, *currentPt);
1209 * keep_rawPt = TRUE; (* num_errPt) ++;
1210 goto FATAL;
1211 }
1212 break;
1213 case GBPARSE_INT_GT :
1214 case GBPARSE_INT_NUMBER :
1215 case GBPARSE_INT_LEFT :
1216
1217 case GBPARSE_INT_ONE_OF_NUM:
1218
1219 retint -> if_from = fuzz;
1220 retint -> id = idp;
1221 retnode -> data.ptrvalue = retint;
1222 Nlm_gbload_number (& ( retint -> from), & (retint -> if_from),
1223 keep_rawPt, currentPt, head_token,
1224 num_errPt,TAKE_FIRST);
1225 Nlm_gbcheck_range(retint -> from, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1226 if ( ! (* num_errPt) ){
1227 if ( * currentPt){
1228 Boolean in_caret = FALSE;
1229 switch ( (*currentPt ) -> choice){
1230 SeqPntPtr point;
1231
1232 default: case GBPARSE_INT_JOIN: case GBPARSE_INT_COMPL:
1233 case GBPARSE_INT_SINGLE_DOT:case GBPARSE_INT_ORDER: case GBPARSE_INT_GROUP:
1234 case GBPARSE_INT_ACCESION:
1235 Nlm_gbparse_error("problem with 2nd number",
1236 head_token, *currentPt);;
1237 * keep_rawPt = TRUE; (* num_errPt) ++;
1238 goto FATAL;
1239 case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1240 /*--------------but have a point, not an interval----*/
1241 Nlm_gbpintpnt(retnode, & retint);
1242 break;
1243 case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1244 Nlm_gbparse_error("Missing \'..\'",
1245 head_token, *currentPt);;
1246 * keep_rawPt = TRUE; (* num_errPt) ++;
1247 goto FATAL;
1248 case GBPARSE_INT_CARET:
1249 if (retint -> if_from){
1250 Nlm_gbparse_error("\'<\' then \'^\'",
1251 head_token, *currentPt);
1252 * keep_rawPt = TRUE; (* num_errPt) ++;
1253 goto FATAL;
1254 }
1255 retint -> if_from = IntFuzzNew();
1256 retint -> if_from -> choice = 4;
1257 retint -> if_from ->a = 4;
1258 retint -> if_to = IntFuzzNew();
1259 retint -> if_to -> choice = 4;
1260 retint -> if_to ->a = 4;
1261 in_caret = TRUE;
1262 /*---no break on purpose ---*/
1263 case GBPARSE_INT_DOT_DOT:
1264 *currentPt = (* currentPt) -> next;
1265 if ( (*currentPt) == NULL){
1266 Nlm_gbparse_error("unexpected end of usable tokens",
1267 head_token, *currentPt);
1268 * keep_rawPt = TRUE; (* num_errPt) ++;
1269 goto FATAL;
1270 }
1271 /*--no break on purpose here ---*/
1272 case GBPARSE_INT_NUMBER:
1273 case GBPARSE_INT_LEFT:
1274
1275 case GBPARSE_INT_ONE_OF_NUM: /* unlikely, but ok */
1276
1277 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1278 if (retint -> if_from){
1279 Nlm_gbparse_error("\'^\' then \'>\'",
1280 head_token, *currentPt);
1281 * keep_rawPt = TRUE; (* num_errPt) ++;
1282 goto FATAL;
1283 }
1284 }
1285 Nlm_gbload_number (& ( retint -> to), & (retint -> if_to),
1286 keep_rawPt, currentPt, head_token,
1287 num_errPt, TAKE_SECOND);
1288 Nlm_gbcheck_range(retint -> to, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1289 /*----------
1290 * The caret location implies a place (point) between two location.
1291 * This is not exactly captured by the ASN.1, but pretty close
1292 *-------*/
1293 if (in_caret){
1294 Int4 to = retint -> to;
1295
1296 point = Nlm_gbpintpnt(retnode, & retint);
1297 if ( point -> point +1 == to){
1298 point -> point = to; /* was essentailly correct */
1299 }else{
1300 point -> fuzz -> choice = 2; /* range */
1301 point -> fuzz -> a = to; /* max */
1302 point -> fuzz ->b = point -> point;
1303 }
1304 }
1305 if (retint != NULL)
1306 if (retint -> from == retint -> to &&
1307 ! retint -> if_from &&
1308 ! retint -> if_to){
1309 /*-------if interval really a point, make is so ----*/
1310 Nlm_gbpintpnt(retnode, & retint);
1311 }
1312 } /* end switch */
1313 }else{
1314 Nlm_gbpintpnt(retnode, & retint);
1315 }
1316 }else{
1317 goto FATAL;
1318 }
1319 break;
1320 default:
1321 Nlm_gbparse_error("No number when expected",
1322 head_token, *currentPt);
1323 * keep_rawPt = TRUE; (* num_errPt) ++;
1324 goto FATAL;
1325
1326 }
1327
1328
1329 RETURN:
1330 return retnode;
1331
1332 FATAL:
1333 if (retint && (* num_errPt)){
1334 SeqIntFree(retint);
1335 retint = NULL;
1336 }
1337 ValNodeFree(retnode);
1338 retnode = NULL;
1339 goto RETURN;
1340 }
1341
1342 /*--------------- Nlm_gbint_ver ()--------------------*/
1343
1344 NLM_EXTERN SeqLocPtr /* sometimes returns points */
1345
Nlm_gbint_ver(Boolean PNTR keep_rawPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id,Boolean accver)1346 Nlm_gbint_ver(Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt,
1347 ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id,
1348 Boolean accver)
1349 {
1350 SeqLocPtr retnode = ValNodeNew(NULL);
1351 SeqIntPtr retint = SeqIntNew();
1352 TextSeqIdPtr tp;
1353 IntFuzzPtr fuzz=NULL;
1354 SeqIdPtr idp = NULL;
1355 Boolean took_choice=FALSE;
1356 CharPtr p;
1357
1358 retnode -> choice = SEQLOC_INT;
1359
1360 if ( (* currentPt) -> choice == GBPARSE_INT_ACCESION){
1361 idp = ValNodeNew(NULL);
1362 if (seq_id){
1363 if (
1364 seq_id -> choice == SEQID_GENBANK
1365 || seq_id -> choice == SEQID_EMBL
1366 || seq_id -> choice == SEQID_DDBJ
1367 ){
1368 idp -> choice = seq_id -> choice;
1369 took_choice = TRUE;
1370 }
1371 }
1372 if (! took_choice){
1373 idp -> choice = SEQID_GENBANK;
1374 }
1375 tp = TextSeqIdNew();
1376 idp -> data.ptrvalue = tp;
1377 if(accver == FALSE)
1378 {
1379 tp->accession = StringSave((*currentPt)->data.ptrvalue);
1380 }
1381 else
1382 {
1383 p = StringChr((*currentPt)->data.ptrvalue, '.');
1384 if(p == NULL)
1385 {
1386 tp->accession = StringSave((*currentPt)->data.ptrvalue);
1387 Nlm_gbparse_error("Missing accession's version",
1388 head_token, *currentPt);
1389 }
1390 else
1391 {
1392 *p = '\0';
1393 tp->accession = StringSave((*currentPt)->data.ptrvalue);
1394 tp->version = atoi(p + 1);
1395 *p = '.';
1396 }
1397 }
1398 *currentPt = (* currentPt) -> next;
1399 if ( ! *currentPt ){
1400 Nlm_gbparse_error("Nothing after accession",
1401 head_token, *currentPt);
1402 * keep_rawPt = TRUE; (* num_errPt) ++;
1403
1404 SeqIdFree(idp);
1405 idp = NULL;
1406
1407 goto FATAL;
1408 }
1409 }else{
1410 idp = SeqIdDup (seq_id);
1411 }
1412 if ( (* currentPt) -> choice == GBPARSE_INT_LT){
1413 fuzz = IntFuzzNew();
1414 fuzz -> choice = 4;
1415 fuzz ->a = 2;
1416 *currentPt = (* currentPt) -> next;
1417 if ( ! *currentPt ){
1418 Nlm_gbparse_error("Nothing after \'<\'",
1419 head_token, *currentPt);
1420 * keep_rawPt = TRUE; (* num_errPt) ++;
1421 goto FATAL;
1422 }
1423 }
1424 if ( ! (* num_errPt))
1425 switch ( (*currentPt ) -> choice){
1426 case GBPARSE_INT_ACCESION :
1427 if ( idp){
1428 Nlm_gbparse_error("duplicate accessions",
1429 head_token, *currentPt);
1430 * keep_rawPt = TRUE; (* num_errPt) ++;
1431 goto FATAL;
1432 }
1433 break;
1434 case GBPARSE_INT_CARET :
1435 Nlm_gbparse_error("caret (^) before number" ,
1436 head_token, *currentPt);
1437 * keep_rawPt = TRUE; (* num_errPt) ++;
1438 goto FATAL;
1439 case GBPARSE_INT_LT :
1440 if ( idp){
1441 Nlm_gbparse_error("duplicate \'<\'",
1442 head_token, *currentPt);
1443 * keep_rawPt = TRUE; (* num_errPt) ++;
1444 goto FATAL;
1445 }
1446 break;
1447 case GBPARSE_INT_GT :
1448 case GBPARSE_INT_NUMBER :
1449 case GBPARSE_INT_LEFT :
1450
1451 case GBPARSE_INT_ONE_OF_NUM:
1452
1453 retint -> if_from = fuzz;
1454 retint -> id = idp;
1455 retnode -> data.ptrvalue = retint;
1456 Nlm_gbload_number (& ( retint -> from), & (retint -> if_from),
1457 keep_rawPt, currentPt, head_token,
1458 num_errPt,TAKE_FIRST);
1459 Nlm_gbcheck_range(retint -> from, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1460 if ( ! (* num_errPt) ){
1461 if ( * currentPt){
1462 Boolean in_caret = FALSE;
1463 switch ( (*currentPt ) -> choice){
1464 SeqPntPtr point;
1465
1466 default: case GBPARSE_INT_JOIN: case GBPARSE_INT_COMPL:
1467 case GBPARSE_INT_SINGLE_DOT:case GBPARSE_INT_ORDER: case GBPARSE_INT_GROUP:
1468 case GBPARSE_INT_ACCESION:
1469 Nlm_gbparse_error("problem with 2nd number",
1470 head_token, *currentPt);;
1471 * keep_rawPt = TRUE; (* num_errPt) ++;
1472 goto FATAL;
1473 case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1474 /*--------------but have a point, not an interval----*/
1475 Nlm_gbpintpnt(retnode, & retint);
1476 break;
1477 case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1478 Nlm_gbparse_error("Missing \'..\'",
1479 head_token, *currentPt);;
1480 * keep_rawPt = TRUE; (* num_errPt) ++;
1481 goto FATAL;
1482 case GBPARSE_INT_CARET:
1483 if (retint -> if_from){
1484 Nlm_gbparse_error("\'<\' then \'^\'",
1485 head_token, *currentPt);
1486 * keep_rawPt = TRUE; (* num_errPt) ++;
1487 goto FATAL;
1488 }
1489 retint -> if_from = IntFuzzNew();
1490 retint -> if_from -> choice = 4;
1491 retint -> if_from ->a = 4;
1492 retint -> if_to = IntFuzzNew();
1493 retint -> if_to -> choice = 4;
1494 retint -> if_to ->a = 4;
1495 in_caret = TRUE;
1496 /*---no break on purpose ---*/
1497 case GBPARSE_INT_DOT_DOT:
1498 *currentPt = (* currentPt) -> next;
1499 if ( (*currentPt) == NULL){
1500 Nlm_gbparse_error("unexpected end of usable tokens",
1501 head_token, *currentPt);
1502 * keep_rawPt = TRUE; (* num_errPt) ++;
1503 goto FATAL;
1504 }
1505 /*--no break on purpose here ---*/
1506 case GBPARSE_INT_NUMBER:
1507 case GBPARSE_INT_LEFT:
1508
1509 case GBPARSE_INT_ONE_OF_NUM: /* unlikely, but ok */
1510
1511 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1512 if (retint -> if_from){
1513 Nlm_gbparse_error("\'^\' then \'>\'",
1514 head_token, *currentPt);
1515 * keep_rawPt = TRUE; (* num_errPt) ++;
1516 goto FATAL;
1517 }
1518 }
1519 Nlm_gbload_number (& ( retint -> to), & (retint -> if_to),
1520 keep_rawPt, currentPt, head_token,
1521 num_errPt, TAKE_SECOND);
1522 Nlm_gbcheck_range(retint -> to, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1523 /*----------
1524 * The caret location implies a place (point) between two location.
1525 * This is not exactly captured by the ASN.1, but pretty close
1526 *-------*/
1527 if (in_caret){
1528 Int4 to = retint -> to;
1529
1530 point = Nlm_gbpintpnt(retnode, & retint);
1531 if ( point -> point +1 == to){
1532 point -> point = to; /* was essentailly correct */
1533 }else{
1534 point -> fuzz -> choice = 2; /* range */
1535 point -> fuzz -> a = to; /* max */
1536 point -> fuzz ->b = point -> point;
1537 }
1538 }
1539 if (retint != NULL)
1540 if (retint -> from == retint -> to &&
1541 ! retint -> if_from &&
1542 ! retint -> if_to){
1543 /*-------if interval really a point, make is so ----*/
1544 Nlm_gbpintpnt(retnode, & retint);
1545 }
1546 } /* end switch */
1547 }else{
1548 Nlm_gbpintpnt(retnode, & retint);
1549 }
1550 }else{
1551 goto FATAL;
1552 }
1553 break;
1554 default:
1555 Nlm_gbparse_error("No number when expected",
1556 head_token, *currentPt);
1557 * keep_rawPt = TRUE; (* num_errPt) ++;
1558 goto FATAL;
1559
1560 }
1561
1562
1563 RETURN:
1564 return retnode;
1565
1566 FATAL:
1567 if (retint && (* num_errPt)){
1568 SeqIntFree(retint);
1569 retint = NULL;
1570 }
1571 ValNodeFree(retnode);
1572 retnode = NULL;
1573 goto RETURN;
1574 }
1575
1576 /*------------------- Nlm_gbpintpnt()-----------*/
1577
1578 NLM_EXTERN SeqPntPtr
Nlm_gbpintpnt(SeqLocPtr retnode,SeqIntPtr PNTR retintPt)1579 Nlm_gbpintpnt(SeqLocPtr retnode, SeqIntPtr PNTR retintPt)
1580 {
1581 SeqPntPtr point;
1582 point = SeqPntNew();
1583 point -> point = (*retintPt) -> from;
1584 point -> id = (*retintPt) -> id;
1585 (*retintPt) -> id = NULL;
1586 point -> fuzz = (*retintPt) -> if_from;
1587 (*retintPt) -> if_from = NULL;
1588 SeqIntFree((*retintPt));
1589 (*retintPt) = NULL;
1590 retnode -> choice = SEQLOC_PNT;
1591 retnode -> data.ptrvalue = point;
1592 return point;
1593 }
1594
1595 /*----- Nlm_gbload_number() -----*/
1596
1597 NLM_EXTERN void
Nlm_gbload_number(Int4 PNTR numPt,IntFuzzPtr PNTR fuzzPt,Boolean PNTR keep_rawPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,int take_which)1598 Nlm_gbload_number (Int4 PNTR numPt, IntFuzzPtr PNTR fuzzPt, Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, int take_which)
1599 {
1600 int num_found=0;
1601 int fuzz_err =0;
1602 Boolean strange_sin_dot = FALSE;
1603
1604 if ((*currentPt ) -> choice == GBPARSE_INT_CARET){
1605 Nlm_gbparse_error("duplicate carets",
1606 head_token, *currentPt);
1607 (*keep_rawPt) = TRUE; (*num_errPt) ++;
1608 *currentPt = (* currentPt) -> next;
1609 fuzz_err = 1;
1610 }else if ((*currentPt ) -> choice == GBPARSE_INT_GT ||
1611 (*currentPt ) -> choice == GBPARSE_INT_LT){
1612 if ( ! * fuzzPt){
1613 * fuzzPt = IntFuzzNew();
1614 }
1615 (* fuzzPt) -> choice = 4;
1616 if ((*currentPt ) -> choice == GBPARSE_INT_GT ){
1617 (* fuzzPt) -> a = 1; /* 'a' serves as "lim" for choice 4 */
1618 }else{
1619 (* fuzzPt) -> a = 2;
1620 }
1621 *currentPt = (* currentPt) -> next;
1622 }else if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1623 strange_sin_dot = TRUE;
1624 *currentPt = (* currentPt) -> next;
1625 if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1626 if ( ! * fuzzPt){
1627 * fuzzPt = IntFuzzNew();
1628 }
1629 (* fuzzPt) -> b = atoi((*currentPt ) -> data.ptrvalue)-1;
1630 (* fuzzPt) -> choice = 2;
1631 if ( take_which == TAKE_FIRST ){
1632 * numPt = (* fuzzPt) -> b;
1633 }
1634 *currentPt = (* currentPt) -> next;
1635 num_found=1;
1636 }else{
1637 fuzz_err =1;
1638 }
1639 if ((*currentPt ) -> choice != GBPARSE_INT_SINGLE_DOT ){
1640 fuzz_err =1;
1641 }else{
1642 *currentPt = (* currentPt) -> next;
1643 if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1644 (* fuzzPt) -> a = atoi((*currentPt ) -> data.ptrvalue)-1;
1645 if ( take_which == TAKE_SECOND ){
1646 * numPt = (* fuzzPt) -> a;
1647 }
1648 *currentPt = (* currentPt) -> next;
1649 }else{
1650 fuzz_err =1;
1651 }
1652 if ((*currentPt ) -> choice == GBPARSE_INT_RIGHT){
1653 *currentPt = (* currentPt) -> next;
1654 }else{
1655 fuzz_err =1;
1656 }
1657 }
1658
1659 }else if ((*currentPt ) -> choice != GBPARSE_INT_NUMBER) {
1660 /* this prevents endless cycling, unconditionally */
1661 if ((*currentPt ) -> choice != GBPARSE_INT_ONE_OF
1662 && (*currentPt ) -> choice != GBPARSE_INT_ONE_OF_NUM)
1663 *currentPt = (* currentPt) -> next;
1664 num_found = -1;
1665 }
1666
1667 if ( ! strange_sin_dot){
1668 if ( ! * currentPt){
1669 Nlm_gbparse_error("unexpected end of interval tokens",
1670 head_token, *currentPt);
1671 * keep_rawPt = TRUE; (* num_errPt) ++;
1672 }else{
1673 if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1674 * numPt = atoi((*currentPt ) -> data.ptrvalue)-1;
1675 *currentPt = (* currentPt) -> next;
1676 num_found=1;
1677 }
1678 }
1679 }
1680
1681 if ( fuzz_err){
1682 Nlm_gbparse_error("Incorrect uncertainty",
1683 head_token, *currentPt);
1684 (*keep_rawPt) = TRUE; (*num_errPt) ++;
1685 }
1686 if ( num_found != 1){
1687 (*keep_rawPt) = TRUE;
1688 /****************
1689 *
1690 * 10..one-of(13,15) type syntax here
1691 *
1692 ***************/
1693 if ((*currentPt ) -> choice == GBPARSE_INT_ONE_OF
1694 || (*currentPt ) -> choice == GBPARSE_INT_ONE_OF_NUM){
1695 Boolean one_of_ok = TRUE;
1696 Boolean at_end_one_of = FALSE;
1697
1698 *currentPt = (* currentPt) -> next;
1699 if ((*currentPt ) -> choice != GBPARSE_INT_LEFT){
1700 one_of_ok = FALSE;
1701 }else{
1702 *currentPt = (* currentPt) -> next;
1703 }
1704 if (one_of_ok && (*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1705 * numPt = atoi((*currentPt ) -> data.ptrvalue)-1;
1706 *currentPt = (* currentPt) -> next;
1707 }else{
1708 one_of_ok = FALSE;
1709 }
1710 while (one_of_ok && ! at_end_one_of && *currentPt != NULL){
1711 switch ( (*currentPt ) -> choice){
1712 default:
1713 one_of_ok = FALSE;
1714 break;
1715 case GBPARSE_INT_COMMA:
1716 case GBPARSE_INT_NUMBER:
1717 *currentPt = (* currentPt) -> next;
1718 break;
1719 case GBPARSE_INT_RIGHT:
1720 *currentPt = (* currentPt) -> next;
1721 at_end_one_of = TRUE;
1722 break;
1723 }
1724 }
1725 if ( ! one_of_ok && ! at_end_one_of){
1726 while (! at_end_one_of && *currentPt != NULL){
1727 if ((*currentPt ) -> choice == GBPARSE_INT_RIGHT){
1728 at_end_one_of = TRUE;
1729 }
1730 *currentPt = (* currentPt) -> next;
1731 }
1732 }
1733
1734 if ( ! one_of_ok){
1735 Nlm_gbparse_error("bad one-of() syntax as number",
1736 head_token, *currentPt);
1737 (*num_errPt) ++;
1738 }
1739 }else{
1740 Nlm_gbparse_error("Number not found when expected",
1741 head_token, *currentPt);
1742 (*num_errPt) ++;
1743 }
1744 }
1745 }
1746
1747 /*----------------- Nlm_gbparse_better_be_done()-------------*/
1748 NLM_EXTERN SeqLocPtr
Nlm_gbparse_better_be_done(int PNTR num_errsPt,ValNodePtr current_token,ValNodePtr head_token,SeqLocPtr ret_so_far,Boolean PNTR keep_rawPt,int paren_count)1749 Nlm_gbparse_better_be_done(int PNTR num_errsPt, ValNodePtr current_token, ValNodePtr head_token, SeqLocPtr ret_so_far, Boolean PNTR keep_rawPt, int paren_count)
1750 {
1751 SeqLocPtr retval = ret_so_far;
1752
1753 if ( current_token)
1754 while (current_token -> choice == GBPARSE_INT_RIGHT){
1755 paren_count --;
1756 current_token = current_token -> next;
1757 if ( ! current_token){
1758 if ( paren_count){
1759 char par_msg[40];
1760 sprintf(par_msg, "mismatched parentheses (%d)", paren_count);
1761 Nlm_gbparse_error(par_msg,
1762 head_token, current_token);
1763 *keep_rawPt = TRUE;
1764 (*num_errsPt) ++;
1765 }
1766 break;
1767 }
1768 }
1769 if ( paren_count){
1770 Nlm_gbparse_error("text after last legal right parenthesis",
1771 head_token, current_token);
1772 *keep_rawPt = TRUE;
1773 (*num_errsPt) ++;
1774 }
1775
1776 if (current_token){
1777 Nlm_gbparse_error("text after end",
1778 head_token, current_token);
1779 *keep_rawPt = TRUE;
1780 (*num_errsPt) ++;
1781 }
1782 return retval;
1783 }
1784
1785 /*-------- Nlm_gbreplace() --------*/
1786
1787 NLM_EXTERN SeqLocPtr
Nlm_gbreplace(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id)1788 Nlm_gbreplace (Boolean PNTR keep_rawPt, int PNTR parenPt, Boolean PNTR sitesPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
1789 {
1790 SeqLocPtr retval = NULL;
1791
1792 * keep_rawPt = TRUE;
1793 *currentPt = (* currentPt) -> next;
1794
1795 if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1796 *currentPt = (* currentPt) -> next;
1797 retval = Nlm_gbloc (keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1798 num_errPt,seq_id);
1799 if ( ! * currentPt){
1800 Nlm_gbparse_error("unexpected end of interval tokens",
1801 head_token, *currentPt);
1802 * keep_rawPt = TRUE; (* num_errPt) ++;
1803 }else{
1804
1805 if ((*currentPt ) -> choice != GBPARSE_INT_COMMA){
1806 Nlm_gbparse_error("Missing comma after first location in replace",
1807 head_token, *currentPt);
1808 (* num_errPt) ++;
1809 }
1810 }
1811 }else{
1812 Nlm_gbparse_error("Missing \'(\'" /* paran match ) */
1813 , head_token, *currentPt);
1814 (* num_errPt) ++;
1815 }
1816 return retval;
1817 }
1818
1819
1820
1821 /*-------- Nlm_gbreplace_ver() --------*/
1822
Nlm_gbreplace_ver(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id,Boolean accver)1823 NLM_EXTERN SeqLocPtr Nlm_gbreplace_ver(Boolean PNTR keep_rawPt, int PNTR parenPt,
1824 Boolean PNTR sitesPt,
1825 ValNodePtr PNTR currentPt,
1826 ValNodePtr head_token, int PNTR num_errPt,
1827 SeqIdPtr seq_id, Boolean accver)
1828 {
1829 SeqLocPtr retval = NULL;
1830
1831 * keep_rawPt = TRUE;
1832 *currentPt = (* currentPt) -> next;
1833
1834 if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1835 *currentPt = (* currentPt) -> next;
1836 retval = Nlm_gbloc_ver (keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1837 num_errPt,seq_id, accver);
1838 if ( ! * currentPt){
1839 Nlm_gbparse_error("unexpected end of interval tokens",
1840 head_token, *currentPt);
1841 * keep_rawPt = TRUE; (* num_errPt) ++;
1842 }else{
1843
1844 if ((*currentPt ) -> choice != GBPARSE_INT_COMMA){
1845 Nlm_gbparse_error("Missing comma after first location in replace",
1846 head_token, *currentPt);
1847 (* num_errPt) ++;
1848 }
1849 }
1850 }else{
1851 Nlm_gbparse_error("Missing \'(\'" /* paran match ) */
1852 , head_token, *currentPt);
1853 (* num_errPt) ++;
1854 }
1855 return retval;
1856 }
1857
1858 /**********************************************************/
Nlm_gbparse_accprefix(CharPtr acc)1859 static int Nlm_gbparse_accprefix(CharPtr acc)
1860 {
1861 CharPtr p;
1862 int ret;
1863
1864 if(acc == NULL || *acc == '\0')
1865 return(0);
1866
1867 for(p = acc; IS_ALPHA(*p) != 0;)
1868 p++;
1869 ret = p - acc;
1870 if(*p == '_')
1871 {
1872 if(ret == 2)
1873 {
1874 for(p++; IS_ALPHA(*p) != 0;)
1875 p++;
1876 ret = p - acc;
1877 if(ret != 3 && ret != 7)
1878 ret = 1;
1879 }
1880 else
1881 ret = 1;
1882 }
1883 else if(p[0] != '\0' && p[0] >= '0' && p[0] <= '9' &&
1884 p[1] != '\0' && p[1] >= '0' && p[1] <= '9' && p[2] == 'S')
1885 ret = 7;
1886 else if(ret != 1 && ret != 2 && ret != 4)
1887 ret = 1;
1888 return(ret);
1889 }
1890
1891
1892 char Saved_ch;
1893
1894 #define Nlm_lex_error_MACRO(msg)\
1895 if (current_col != NULL && *current_col){\
1896 Saved_ch = *(current_col +1);\
1897 *(current_col +1) = '\0';\
1898 }else{\
1899 Saved_ch='\0';\
1900 }\
1901 Nlm_gbparse_error(msg, & forerrmacro, & forerrmacro);\
1902 if (Saved_ch)\
1903 *(current_col +1) = Saved_ch;
1904
1905 /*------------- gbparselex()-----------------------*/
1906
1907 NLM_EXTERN int
Nlm_gbparselex(CharPtr linein,ValNodePtr PNTR lexed)1908 Nlm_gbparselex(CharPtr linein, ValNodePtr PNTR lexed)
1909 {
1910 CharPtr current_col=0, points_at_term_null,spare, line_use = NULL;
1911 int dex;
1912 int retval = 0, len;
1913 ValNodePtr current_token = NULL, last_token = NULL;
1914 Boolean skip_new_token=FALSE;
1915 Boolean die_now=FALSE;
1916 ValNode forerrmacro;
1917
1918
1919 if (linein == NULL) return retval;
1920
1921 forerrmacro.choice =GBPARSE_INT_ACCESION ;
1922
1923 if (*linein){
1924 len = StringLen(linein);
1925 line_use = MemNew(len + 1);
1926 StringCpy(line_use, linein);
1927 if ( * lexed){
1928 Nlm_lex_error_MACRO( "Lex list not cleared on entry to Nlm_gbparselex")
1929 ValNodeFree( * lexed);
1930 * lexed = NULL;
1931 }
1932 current_col = line_use ;
1933 forerrmacro.data.ptrvalue = line_use;
1934 /*---------
1935 * Clear terminal white space
1936 *---------*/
1937 points_at_term_null = line_use + len;
1938 spare = points_at_term_null - 1;
1939 while (*spare == ' ' || *spare == '\n' || *spare == '\r' || *spare == '~') {
1940 *spare-- = '\0';
1941 points_at_term_null --;
1942 }
1943
1944
1945 while (current_col < points_at_term_null && ! die_now) {
1946 if ( ! skip_new_token){
1947 last_token = current_token;
1948 current_token = ValNodeNew(current_token);
1949 if ( ! * lexed)
1950 * lexed = current_token;
1951 }
1952 switch ( *current_col){
1953
1954 case '\"':
1955 skip_new_token = FALSE;
1956 current_token -> choice = GBPARSE_INT_STRING;
1957 for (spare = current_col +1; spare < points_at_term_null;
1958 spare ++) {
1959 if ( *spare == '\"'){
1960 break;
1961 }
1962 }
1963 if (spare >= points_at_term_null){
1964 Nlm_lex_error_MACRO( "unterminated string")
1965 retval ++;
1966 }else{
1967 len = spare-current_col + 1;
1968 current_token -> data.ptrvalue =
1969 MemNew(len +2);
1970 StringNCpy(current_token -> data.ptrvalue,
1971 current_col,len);
1972 current_col += len;
1973 }
1974 break;
1975 /*------
1976 * NUMBER
1977 *------*/
1978 case '0': case '1': case '2': case '3': case '4':
1979 case '5': case '6': case '7': case '8': case '9':
1980 skip_new_token = FALSE;
1981 current_token -> choice = GBPARSE_INT_NUMBER;
1982 for (dex=0, spare = current_col; isdigit((int) *spare); spare ++){
1983 dex ++ ;
1984 }
1985 current_token -> data.ptrvalue = MemNew(dex+1);
1986 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
1987 current_col += dex -1;
1988 break;
1989 /*------
1990 * JOIN
1991 *------*/
1992 case 'j':
1993 skip_new_token = FALSE;
1994 current_token -> choice = GBPARSE_INT_JOIN;
1995 if (StringNCmp(current_col,"join",(unsigned) 4)!=0){
1996 Nlm_lex_error_MACRO( "\"join\" misspelled")
1997 retval += 10;
1998 for(;*current_col && *current_col != '('; current_col++)
1999 ; /* vi match ) empty body*/
2000 current_col -- ; /* back up 'cause ++ follows */
2001 }else{
2002 current_col += 3;
2003 }
2004 break;
2005
2006 /*------
2007 * ORDER and ONE-OF
2008 *------*/
2009 case 'o':
2010 skip_new_token = FALSE;
2011 if (StringNCmp(current_col,"order",(unsigned) 5)!=0){
2012 if (StringNCmp(current_col,"one-of",(unsigned) 6)!=0){
2013 Nlm_lex_error_MACRO( "\"order\" or \"one-of\" misspelled")
2014 retval ++;
2015 for(;*current_col && *current_col != '('; current_col++)
2016 ; /* vi match ) empty body*/
2017 current_col -- ; /* back up 'cause ++ follows */
2018 }else{
2019 current_token -> choice = GBPARSE_INT_ONE_OF ;
2020 current_col += 5;
2021 }
2022 }else{
2023 current_token -> choice = GBPARSE_INT_ORDER;
2024 current_col += 4;
2025 }
2026 break;
2027
2028 /*------
2029 * REPLACE
2030 *------*/
2031 case 'r' :
2032 skip_new_token = FALSE;
2033 current_token -> choice = GBPARSE_INT_REPLACE ;
2034 if (StringNCmp(current_col,"replace",(unsigned) 6)!=0){
2035 Nlm_lex_error_MACRO( "\"replace\" misspelled")
2036 retval ++;
2037 for(;*current_col && *current_col != '('; current_col++)
2038 ; /* vi match ) empty body*/
2039 current_col -- ; /* back up 'cause ++ follows */
2040 }else{
2041 current_col += 6;
2042 }
2043 break;
2044
2045 /*------
2046 * GAP or GROUP or GI
2047 *------*/
2048 case 'g':
2049 skip_new_token = FALSE;
2050 if(StringNCmp(current_col, "gap", 3) == 0 &&
2051 (current_col[3] == '(' ||
2052 current_col[3] == ' ' ||
2053 current_col[3] == '\t' ||
2054 current_col[3] == '\0'))
2055 {
2056 current_token->choice = GBPARSE_INT_GAP;
2057 current_token->data.ptrvalue = MemNew(4);
2058 StringCpy(current_token->data.ptrvalue, "gap");
2059 if(StringNICmp(current_col + 3, "(unk", 4) == 0)
2060 {
2061 current_token->choice = GBPARSE_INT_UNK_GAP;
2062 last_token = current_token;
2063 current_token = ValNodeNew(current_token);
2064 current_token->choice = GBPARSE_INT_LEFT;
2065 current_col += 4;
2066 }
2067 current_col += 2;
2068 break;
2069 }
2070 if(StringNCmp(current_col, "gi|", 3) == 0) {
2071 current_token->choice = GBPARSE_INT_ACCESION;
2072 current_col += 3;
2073 for (; IS_DIGIT(*current_col); current_col++) ;
2074 break;
2075 }
2076 current_token -> choice = GBPARSE_INT_GROUP;
2077 if (StringNCmp(current_col,"group",(unsigned) 5)!=0){
2078 Nlm_lex_error_MACRO("\"group\" misspelled")
2079 retval ++;
2080 for(;*current_col && *current_col != '('; current_col++)
2081 ; /* vi match ) empty body*/
2082 current_col -- ; /* back up 'cause ++ follows */
2083 }else{
2084 current_col += 4;
2085 }
2086 break;
2087
2088 /*------
2089 * COMPLEMENT
2090 *------*/
2091 case 'c':
2092 skip_new_token = FALSE;
2093 current_token -> choice = GBPARSE_INT_COMPL;
2094 if (StringNCmp(current_col,"complement",(unsigned) 10)!=0){
2095 Nlm_lex_error_MACRO("\"complement\" misspelled")
2096 retval += 10;
2097 for(;*current_col && *current_col != '('; current_col++)
2098 ; /* vi match ) empty body*/
2099 current_col -- ; /* back up 'cause ++ follows */
2100 }else{
2101 current_col += 9;
2102 }
2103 break;
2104
2105 /*-------
2106 * internal bases ignored
2107 *---------*/
2108 case 'b':
2109 if (StringNCmp(current_col,"bases",(unsigned) 5)!=0){
2110 goto ACCESSION;
2111 }else{
2112 skip_new_token = TRUE;
2113 current_col += 4;
2114 }
2115 break;
2116
2117 /*------
2118 * ()^.,<> (bases (sites
2119 *------*/
2120 case '(':
2121 if (StringNCmp(current_col,"(base",(unsigned) 5)==0){
2122 skip_new_token = FALSE;
2123 current_token -> choice = GBPARSE_INT_JOIN;
2124 current_col += 4;
2125 if (*current_col != '\0')
2126 if ( * (current_col +1) == 's')
2127 current_col ++;
2128 last_token = current_token;
2129 current_token = ValNodeNew(current_token);
2130 current_token -> choice = GBPARSE_INT_LEFT;
2131 }else if (StringNCmp(current_col,"(sites",(unsigned) 5)==0){
2132 skip_new_token = FALSE;
2133 current_col += 5;
2134 if (*current_col != '\0')
2135 if ( * (current_col +1) == ')'){
2136 current_col ++;
2137 current_token -> choice = GBPARSE_INT_SITES;
2138 }else{
2139 current_token -> choice = GBPARSE_INT_SITES;
2140 last_token = current_token;
2141 current_token = ValNodeNew(current_token);
2142 current_token -> choice = GBPARSE_INT_JOIN;
2143 last_token = current_token;
2144 current_token = ValNodeNew(current_token);
2145 current_token -> choice = GBPARSE_INT_LEFT;
2146 if (*current_col != '\0'){
2147 if ( * (current_col +1) == ';'){
2148 current_col ++;
2149 }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2150 current_col += 2;
2151 }
2152 }
2153 }
2154 }else{
2155 skip_new_token = FALSE;
2156 current_token -> choice = GBPARSE_INT_LEFT;
2157 }
2158 break;
2159
2160 case ')':
2161 skip_new_token = FALSE;
2162 current_token -> choice = GBPARSE_INT_RIGHT;
2163
2164 break;
2165
2166 case '^':
2167 skip_new_token = FALSE;
2168 current_token -> choice = GBPARSE_INT_CARET;
2169 break;
2170
2171 case '-':
2172 skip_new_token = FALSE;
2173 current_token -> choice = GBPARSE_INT_DOT_DOT ;
2174 break;
2175 case '.':
2176 skip_new_token = FALSE;
2177 if (StringNCmp(current_col,"..",(unsigned) 2)!=0){
2178 current_token -> choice = GBPARSE_INT_SINGLE_DOT ;
2179 }else{
2180 current_token -> choice = GBPARSE_INT_DOT_DOT;
2181 current_col ++ ;
2182 }
2183 break;
2184
2185 case '>':
2186 skip_new_token = FALSE;
2187 current_token -> choice = GBPARSE_INT_GT;
2188 break;
2189
2190 case '<':
2191 skip_new_token = FALSE;
2192 current_token -> choice = GBPARSE_INT_LT;
2193
2194 break;
2195
2196 case ';':
2197 case ',':
2198 skip_new_token = FALSE;
2199 current_token -> choice = GBPARSE_INT_COMMA;
2200 break;
2201
2202 case ' ': case '\t': case '\n': case '\r': case '~':
2203 skip_new_token = TRUE;
2204 break;
2205
2206 case 't' :
2207 if (StringNCmp(current_col,"to",(unsigned) 2)!=0){
2208 goto ACCESSION;
2209 }else{
2210 skip_new_token = FALSE;
2211 current_token -> choice = GBPARSE_INT_DOT_DOT;
2212 current_col ++ ;
2213 break;
2214 }
2215
2216 case 's' :
2217 if (StringNCmp(current_col,"site",(unsigned) 4)!=0){
2218 goto ACCESSION;
2219 }else{
2220 skip_new_token = FALSE;
2221 current_token -> choice = GBPARSE_INT_SITES;
2222 current_col += 3 ;
2223 if (*current_col != '\0')
2224 if ( * (current_col +1) == 's')
2225 current_col ++;
2226 if (*current_col != '\0'){
2227 if ( * (current_col +1) == ';'){
2228 current_col ++;
2229 }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2230 current_col += 2;
2231 }
2232 }
2233 break;
2234 }
2235
2236
2237 ACCESSION:
2238 default:
2239 /*-------
2240 * all GenBank accessions start with a capital letter
2241 * and then have numbers
2242 ------*/
2243 /* new accessions start with 2 capital letters !! 1997 */
2244 /* new accessions have .version !! 2/15/1999 */
2245 skip_new_token = FALSE;
2246 current_token -> choice = GBPARSE_INT_ACCESION;
2247 dex = Nlm_gbparse_accprefix(current_col);
2248 spare = current_col + dex;
2249 for (; isdigit((int) *spare); spare ++){
2250 dex ++ ;
2251 }
2252 if (*spare == '.') {
2253 dex ++ ;
2254 for (spare++; isdigit((int) *spare); spare ++){
2255 dex ++ ;
2256 }
2257 }
2258 if (*spare != ':'){
2259 Nlm_lex_error_MACRO( "ACCESSION missing \":\"" )
2260 retval += 10;
2261 current_col --;
2262 }
2263 current_token -> data.ptrvalue = MemNew(dex+1);
2264 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2265 current_col += dex ;
2266
2267
2268 }
2269 /*--move to past last "good" character---*/
2270 current_col ++;
2271 }
2272 if ( ! * lexed && current_token){
2273 * lexed = current_token;
2274 }
2275 if (skip_new_token && current_token) {
2276 /*---------
2277 * last node points to a null (blank or white space token)
2278 *-----------*/
2279 if (last_token){
2280 last_token -> next = NULL;
2281 }else{
2282 * lexed = NULL;
2283 }
2284 ValNodeFree(current_token);
2285 }
2286 }
2287 if ( line_use)
2288 MemFree(line_use);
2289
2290 return retval;
2291 }
2292
2293
2294 /*------------- Nlm_gbparselex_ver() -----------------------*/
2295
2296 NLM_EXTERN int
Nlm_gbparselex_ver(CharPtr linein,ValNodePtr PNTR lexed,Boolean accver)2297 Nlm_gbparselex_ver(CharPtr linein, ValNodePtr PNTR lexed, Boolean accver)
2298 {
2299 CharPtr current_col=0, points_at_term_null,spare, line_use = NULL;
2300 int dex;
2301 int retval = 0, len;
2302 ValNodePtr current_token = NULL, last_token = NULL;
2303 Boolean skip_new_token=FALSE;
2304 Boolean die_now=FALSE;
2305 ValNode forerrmacro;
2306
2307 forerrmacro.choice =GBPARSE_INT_ACCESION ;
2308
2309 if (*linein ){
2310 len = StringLen(linein);
2311 line_use = MemNew(len + 1);
2312 StringCpy(line_use, linein);
2313 if ( * lexed){
2314 Nlm_lex_error_MACRO( "Lex list not cleared on entry to Nlm_gbparselex_ver")
2315 ValNodeFree( * lexed);
2316 * lexed = NULL;
2317 }
2318 current_col = line_use ;
2319 forerrmacro.data.ptrvalue = line_use;
2320 /*---------
2321 * Clear terminal white space
2322 *---------*/
2323 points_at_term_null = line_use + len;
2324 spare = points_at_term_null - 1;
2325 while (*spare == ' ' || *spare == '\n' || *spare == '\r' || *spare == '~') {
2326 *spare-- = '\0';
2327 points_at_term_null --;
2328 }
2329
2330
2331 while (current_col < points_at_term_null && ! die_now) {
2332 if ( ! skip_new_token){
2333 last_token = current_token;
2334 current_token = ValNodeNew(current_token);
2335 if ( ! * lexed)
2336 * lexed = current_token;
2337 }
2338 switch ( *current_col){
2339
2340 case '\"':
2341 skip_new_token = FALSE;
2342 current_token -> choice = GBPARSE_INT_STRING;
2343 for (spare = current_col +1; spare < points_at_term_null;
2344 spare ++) {
2345 if ( *spare == '\"'){
2346 break;
2347 }
2348 }
2349 if (spare >= points_at_term_null){
2350 Nlm_lex_error_MACRO( "unterminated string")
2351 retval ++;
2352 }else{
2353 len = spare-current_col + 1;
2354 current_token -> data.ptrvalue =
2355 MemNew(len +2);
2356 StringNCpy(current_token -> data.ptrvalue,
2357 current_col,len);
2358 current_col += len;
2359 }
2360 break;
2361 /*------
2362 * NUMBER
2363 *------*/
2364 case '0': case '1': case '2': case '3': case '4':
2365 case '5': case '6': case '7': case '8': case '9':
2366 skip_new_token = FALSE;
2367 current_token -> choice = GBPARSE_INT_NUMBER;
2368 for (dex=0, spare = current_col; isdigit((int) *spare); spare ++){
2369 dex ++ ;
2370 }
2371 current_token -> data.ptrvalue = MemNew(dex+1);
2372 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2373 current_col += dex -1;
2374 break;
2375 /*------
2376 * JOIN
2377 *------*/
2378 case 'j':
2379 skip_new_token = FALSE;
2380 current_token -> choice = GBPARSE_INT_JOIN;
2381 if (StringNCmp(current_col,"join",(unsigned) 4)!=0){
2382 Nlm_lex_error_MACRO( "\"join\" misspelled")
2383 retval += 10;
2384 for(;*current_col && *current_col != '('; current_col++)
2385 ; /* vi match ) empty body*/
2386 current_col -- ; /* back up 'cause ++ follows */
2387 }else{
2388 current_col += 3;
2389 }
2390 break;
2391
2392 /*------
2393 * ORDER and ONE-OF
2394 *------*/
2395 case 'o':
2396 skip_new_token = FALSE;
2397 if (StringNCmp(current_col,"order",(unsigned) 5)!=0){
2398 if (StringNCmp(current_col,"one-of",(unsigned) 6)!=0){
2399 Nlm_lex_error_MACRO( "\"order\" or \"one-of\" misspelled")
2400 retval ++;
2401 for(;*current_col && *current_col != '('; current_col++)
2402 ; /* vi match ) empty body*/
2403 current_col -- ; /* back up 'cause ++ follows */
2404 }else{
2405 current_token -> choice = GBPARSE_INT_ONE_OF ;
2406 current_col += 5;
2407 }
2408 }else{
2409 current_token -> choice = GBPARSE_INT_ORDER;
2410 current_col += 4;
2411 }
2412 break;
2413
2414 /*------
2415 * REPLACE
2416 *------*/
2417 case 'r' :
2418 skip_new_token = FALSE;
2419 current_token -> choice = GBPARSE_INT_REPLACE ;
2420 if (StringNCmp(current_col,"replace",(unsigned) 6)!=0){
2421 Nlm_lex_error_MACRO( "\"replace\" misspelled")
2422 retval ++;
2423 for(;*current_col && *current_col != '('; current_col++)
2424 ; /* vi match ) empty body*/
2425 current_col -- ; /* back up 'cause ++ follows */
2426 }else{
2427 current_col += 6;
2428 }
2429 break;
2430
2431 /*------
2432 * GAP or GROUP or GI
2433 *------*/
2434 case 'g':
2435 skip_new_token = FALSE;
2436 if(StringNCmp(current_col, "gap", 3) == 0 &&
2437 (current_col[3] == '(' ||
2438 current_col[3] == ' ' ||
2439 current_col[3] == '\t' ||
2440 current_col[3] == '\0'))
2441 {
2442 current_token->choice = GBPARSE_INT_GAP;
2443 current_token->data.ptrvalue = MemNew(4);
2444 StringCpy(current_token->data.ptrvalue, "gap");
2445 if(StringNICmp(current_col + 3, "(unk", 4) == 0)
2446 {
2447 current_token->choice = GBPARSE_INT_UNK_GAP;
2448 last_token = current_token;
2449 current_token = ValNodeNew(current_token);
2450 current_token->choice = GBPARSE_INT_LEFT;
2451 current_col += 4;
2452 }
2453 current_col += 2;
2454 break;
2455 }
2456 if(StringNCmp(current_col, "gi|", 3) == 0) {
2457 current_token->choice = GBPARSE_INT_ACCESION;
2458 current_col += 3;
2459 for (; IS_DIGIT(*current_col); current_col++) ;
2460 break;
2461 }
2462 current_token -> choice = GBPARSE_INT_GROUP;
2463 if (StringNCmp(current_col,"group",(unsigned) 5)!=0){
2464 Nlm_lex_error_MACRO("\"group\" misspelled")
2465 retval ++;
2466 for(;*current_col && *current_col != '('; current_col++)
2467 ; /* vi match ) empty body*/
2468 current_col -- ; /* back up 'cause ++ follows */
2469 }else{
2470 current_col += 4;
2471 }
2472 break;
2473
2474 /*------
2475 * COMPLEMENT
2476 *------*/
2477 case 'c':
2478 skip_new_token = FALSE;
2479 current_token -> choice = GBPARSE_INT_COMPL;
2480 if (StringNCmp(current_col,"complement",(unsigned) 10)!=0){
2481 Nlm_lex_error_MACRO("\"complement\" misspelled")
2482 retval += 10;
2483 for(;*current_col && *current_col != '('; current_col++)
2484 ; /* vi match ) empty body*/
2485 current_col -- ; /* back up 'cause ++ follows */
2486 }else{
2487 current_col += 9;
2488 }
2489 break;
2490
2491 /*-------
2492 * internal bases ignored
2493 *---------*/
2494 case 'b':
2495 if (StringNCmp(current_col,"bases",(unsigned) 5)!=0){
2496 goto ACCESSION;
2497 }else{
2498 skip_new_token = TRUE;
2499 current_col += 4;
2500 }
2501 break;
2502
2503 /*------
2504 * ()^.,<> (bases (sites
2505 *------*/
2506 case '(':
2507 if (StringNCmp(current_col,"(base",(unsigned) 5)==0){
2508 skip_new_token = FALSE;
2509 current_token -> choice = GBPARSE_INT_JOIN;
2510 current_col += 4;
2511 if (*current_col != '\0')
2512 if ( * (current_col +1) == 's')
2513 current_col ++;
2514 last_token = current_token;
2515 current_token = ValNodeNew(current_token);
2516 current_token -> choice = GBPARSE_INT_LEFT;
2517 }else if (StringNCmp(current_col,"(sites",(unsigned) 5)==0){
2518 skip_new_token = FALSE;
2519 current_col += 5;
2520 if (*current_col != '\0')
2521 if ( * (current_col +1) == ')'){
2522 current_col ++;
2523 current_token -> choice = GBPARSE_INT_SITES;
2524 }else{
2525 current_token -> choice = GBPARSE_INT_SITES;
2526 last_token = current_token;
2527 current_token = ValNodeNew(current_token);
2528 current_token -> choice = GBPARSE_INT_JOIN;
2529 last_token = current_token;
2530 current_token = ValNodeNew(current_token);
2531 current_token -> choice = GBPARSE_INT_LEFT;
2532 if (*current_col != '\0'){
2533 if ( * (current_col +1) == ';'){
2534 current_col ++;
2535 }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2536 current_col += 2;
2537 }
2538 }
2539 }
2540 }else{
2541 skip_new_token = FALSE;
2542 current_token -> choice = GBPARSE_INT_LEFT;
2543 }
2544 break;
2545
2546 case ')':
2547 skip_new_token = FALSE;
2548 current_token -> choice = GBPARSE_INT_RIGHT;
2549
2550 break;
2551
2552 case '^':
2553 skip_new_token = FALSE;
2554 current_token -> choice = GBPARSE_INT_CARET;
2555 break;
2556
2557 case '-':
2558 skip_new_token = FALSE;
2559 current_token -> choice = GBPARSE_INT_DOT_DOT ;
2560 break;
2561 case '.':
2562 skip_new_token = FALSE;
2563 if (StringNCmp(current_col,"..",(unsigned) 2)!=0){
2564 current_token -> choice = GBPARSE_INT_SINGLE_DOT ;
2565 }else{
2566 current_token -> choice = GBPARSE_INT_DOT_DOT;
2567 current_col ++ ;
2568 }
2569 break;
2570
2571 case '>':
2572 skip_new_token = FALSE;
2573 current_token -> choice = GBPARSE_INT_GT;
2574 break;
2575
2576 case '<':
2577 skip_new_token = FALSE;
2578 current_token -> choice = GBPARSE_INT_LT;
2579
2580 break;
2581
2582 case ';':
2583 case ',':
2584 skip_new_token = FALSE;
2585 current_token -> choice = GBPARSE_INT_COMMA;
2586 break;
2587
2588 case ' ': case '\t': case '\n': case '\r': case '~':
2589 skip_new_token = TRUE;
2590 break;
2591
2592 case 't' :
2593 if (StringNCmp(current_col,"to",(unsigned) 2)!=0){
2594 goto ACCESSION;
2595 }else{
2596 skip_new_token = FALSE;
2597 current_token -> choice = GBPARSE_INT_DOT_DOT;
2598 current_col ++ ;
2599 break;
2600 }
2601
2602 case 's' :
2603 if (StringNCmp(current_col,"site",(unsigned) 4)!=0){
2604 goto ACCESSION;
2605 }else{
2606 skip_new_token = FALSE;
2607 current_token -> choice = GBPARSE_INT_SITES;
2608 current_col += 3 ;
2609 if (*current_col != '\0')
2610 if ( * (current_col +1) == 's')
2611 current_col ++;
2612 if (*current_col != '\0'){
2613 if ( * (current_col +1) == ';'){
2614 current_col ++;
2615 }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2616 current_col += 2;
2617 }
2618 }
2619 break;
2620 }
2621
2622
2623 ACCESSION:
2624 default:
2625 /*-------
2626 * all GenBank accessions start with a capital letter
2627 * and then have numbers
2628 ------*/
2629 /* new accessions start with 2 capital letters !! 1997 */
2630 /* new accessions have .version !! 2/15/1999 */
2631 skip_new_token = FALSE;
2632 current_token -> choice = GBPARSE_INT_ACCESION;
2633 dex = Nlm_gbparse_accprefix(current_col);
2634 spare = current_col + dex;
2635 for (; isdigit((int) *spare); spare ++){
2636 dex ++ ;
2637 }
2638 if (accver != FALSE && *spare == '.') {
2639 dex ++ ;
2640 for (spare++; isdigit((int) *spare); spare ++){
2641 dex ++ ;
2642 }
2643 }
2644 if (*spare != ':'){
2645 Nlm_lex_error_MACRO( "ACCESSION missing \":\"" )
2646 retval += 10;
2647 current_col --;
2648 }
2649 current_token -> data.ptrvalue = MemNew(dex+1);
2650 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2651 current_col += dex ;
2652
2653
2654 }
2655 /*--move to past last "good" character---*/
2656 current_col ++;
2657 }
2658 if ( ! * lexed && current_token){
2659 * lexed = current_token;
2660 }
2661 if (skip_new_token && current_token) {
2662 /*---------
2663 * last node points to a null (blank or white space token)
2664 *-----------*/
2665 if (last_token){
2666 last_token -> next = NULL;
2667 }else{
2668 * lexed = NULL;
2669 }
2670 ValNodeFree(current_token);
2671 }
2672 }
2673 if ( line_use)
2674 MemFree(line_use);
2675
2676 return retval;
2677 }
2678
2679
2680 /*---- non_white()----*/
2681
2682 NLM_EXTERN CharPtr
Nlm_non_white(CharPtr ch)2683 Nlm_non_white(CharPtr ch)
2684 {
2685 while (isspace((int) *++ch))if (! *ch) break;
2686 ;
2687 return ch;
2688 }
2689
2690 /*------ gbparse_lexfree()-------*/
2691
2692 NLM_EXTERN ValNodePtr
Nlm_gbparse_lexfree(ValNodePtr anp)2693 Nlm_gbparse_lexfree(ValNodePtr anp)
2694 {
2695 ValNodePtr next;
2696
2697 while (anp != NULL)
2698 {
2699 next = anp->next;
2700 if ( anp -> choice == GBPARSE_INT_NUMBER ||
2701 anp -> choice == GBPARSE_INT_ACCESION){
2702 MemFree(anp->data.ptrvalue);
2703 }
2704 MemFree(anp);
2705 anp = next;
2706 }
2707
2708 return NULL;
2709 }
2710