1 /* gbparint.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  gbparint.c
27 *
28 * Author:  Karl Sirotkin
29 *
30 * $Log: gbparint.c,v $
31 * Revision 6.10  2014/08/01 17:14:01  bazhin
32 * Added support for new format (4+2+S+{6|7|8}) WGS scaffolds.
33 *
34 * Revision 6.9  2009/10/02 19:46:00  kans
35 * address clang static analyzer warnings
36 *
37 * Revision 6.8  2004/07/22 16:08:35  bazhin
38 * Changes to parse gaps of unknown lengths (like "gap(unk100)")
39 * within location strings.
40 *
41 * Revision 6.7  2004/03/03 17:32:19  kans
42 * Nlm_gbparselex checks against NULL input
43 *
44 * Revision 6.6  2003/12/05 16:42:11  bazhin
45 * Nlm_gbparselex() and Nlm_gbparselex_ver() functions now can handle
46 * RefSeq and WGS accessions.
47 *
48 * Revision 6.5  2001/06/07 17:00:54  tatiana
49 * added gi option in Nlm_gbparselex()
50 *
51 * Revision 6.4  2000/03/20 23:38:39  aleksey
52 * Finally submitted the changes which have been made by serge bazhin
53 * and been kept in my local directory.
54 *
55 * These changes allow to establish user callback functions
56 * in 'Asn2ffJobPtr' structure which are called within
57 * 'SeqEntryToFlatAjp' function call.
58 * The new members are:
59 * user_data       - pointer to a user context for passing data
60 * ajp_count_index - user defined function
61 * ajp_print_data  - user defined function
62 * ajp_print_index - user defined function
63 *
64 * Revision 6.3  1999/04/06 19:42:55  bazhin
65 * Changes, related to flat2asn's ACCESSION.VERSION parsing.
66 *
67 * Revision 6.2  1999/04/02 21:15:07  tatiana
68 * accession.version added
69 *
70 * Revision 6.1  1997/10/24 21:28:39  bazhin
71 * Is able to distinguish and process "gap(...)" tokens inside
72 * of location entries. Made for CONTIG line join contents.
73 *
74 * Revision 6.0  1997/08/25 18:06:05  madden
75 * Revision changed to 6.0
76 *
77 * Revision 5.3  1997/06/19 18:38:01  vakatov
78 * [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
79 *
80 * Revision 5.2  1997/02/06 00:16:14  tatiana
81 * dealing with 2+6 accession
82 *
83  * Revision 5.1  1997/01/27  19:16:17  tatiana
84  * accept two-letter prefix in accession number
85  *
86  * Revision 5.0  1996/05/28  13:23:23  ostell
87  * Set to revision 5.0
88  *
89  * Revision 4.2  1996/05/21  21:12:05  tatiana
90  * bullet proof in gbparseint()
91  *
92  * Revision 4.1  1995/07/31  19:02:10  tatiana
93  * fix seq_id->choice
94  *
95  * Revision 1.8  1995/05/15  21:46:05  ostell
96  * added Log line
97  *
98 *
99 *
100 */
101 
102 #include "parsegb.h"
103 #include "gbparlex.h"
104 #include "errdefn.h"
105 #include <sequtil.h>
106 #include <edutil.h>
107 
108 #define TAKE_FIRST 1
109 #define TAKE_SECOND 2
110 
111 void Nlm_gbgap PROTO((ValNodePtr PNTR currentPt, ValNodePtr PNTR retval,
112                       Boolean unknown));
113 
114 /*--------- do_Nlm_gbparse_error () ---------------*/
115 
116 NLM_EXTERN void
do_Nlm_gbparse_error(CharPtr msg,CharPtr details)117 do_Nlm_gbparse_error (CharPtr msg, CharPtr details)
118 {
119     Int4 len = StringLen(msg) +7;
120     CharPtr errmsg, temp;
121 
122     len += StringLen(details);
123     temp = errmsg= MemNew((size_t)len);
124     temp = StringMove(temp, msg);
125     temp = StringMove(temp, " at ");
126     temp = StringMove(temp, details);
127 
128     ErrPostStr(SEV_ERROR, ERR_FEATURE_LocationParsing, errmsg);
129 
130     MemFree(errmsg);
131 }
132 #define MAKE_THREAD_SAFE
133 #ifndef MAKE_THREAD_SAFE
134 static Nlm_gbparse_errfunc Err_func = do_Nlm_gbparse_error;
135 static Nlm_gbparse_rangefunc Range_func = NULL;
136 static Pointer Nlm_gbparse_range_data = NULL;
137 #define MACRO_THREAD_SAVE_STATIC
138 
139 #else
140 
141 #include <ncbithr.h>
142 
143 static TNlmTls Err_func_tls=NULL;
144 static TNlmTls Range_func_tls=NULL;
145 static TNlmTls Nlm_gbparse_range_data_tls=NULL;
146 
147 #define MACRO_THREAD_SAVE_STATIC \
148         Nlm_gbparse_errfunc Err_func = NULL; \
149         Nlm_gbparse_rangefunc Range_func = NULL; \
150         Pointer Nlm_gbparse_range_data = NULL; \
151         if(Err_func_tls) NlmTlsGetValue(Err_func_tls,(VoidPtr PNTR)&Err_func); \
152     if(!Err_func) Err_func = do_Nlm_gbparse_error; \
153         if(Range_func_tls) NlmTlsGetValue(Range_func_tls,(VoidPtr PNTR)&Range_func); \
154         if(Nlm_gbparse_range_data_tls) NlmTlsGetValue(Nlm_gbparse_range_data_tls,(VoidPtr PNTR)&Nlm_gbparse_range_data);
155 #endif
156 
157 /*------------------ Nlm_gbcheck_range()-------------*/
158 static void
Nlm_gbcheck_range(Int4 num,SeqIdPtr idp,Boolean PNTR keep_rawPt,int PNTR num_errsPt,ValNodePtr head,ValNodePtr current)159 Nlm_gbcheck_range(Int4 num, SeqIdPtr idp, Boolean PNTR keep_rawPt, int PNTR num_errsPt, ValNodePtr head, ValNodePtr current)
160 {
161     Int4 len;
162     MACRO_THREAD_SAVE_STATIC;
163     if (Range_func != NULL){
164         len = (*Range_func)(Nlm_gbparse_range_data, idp);
165         if (len > 0)
166         if (num <0 || num >= len){
167             Nlm_gbparse_error("range error",  head, current);
168             * keep_rawPt = TRUE;
169             (*num_errsPt) ++;
170         }
171     }
172 }
173 
174 /*----------- Nlm_install_gbparse_error_handler ()-------------*/
175 
176 NLM_EXTERN void
Nlm_install_gbparse_error_handler(Nlm_gbparse_errfunc new_func)177 Nlm_install_gbparse_error_handler(Nlm_gbparse_errfunc new_func)
178 {
179 #ifdef MAKE_THREAD_SAFE
180     NlmTlsSetValue(&Err_func_tls, (VoidPtr PNTR) new_func, NULL);
181 #else
182     Err_func = new_func;
183 #endif
184 }
185 
186 /*----------- Nlm_install_gbparse_range_func ()-------------*/
187 
188 NLM_EXTERN void
Nlm_install_gbparse_range_func(Pointer data,Nlm_gbparse_rangefunc new_func)189 Nlm_install_gbparse_range_func(Pointer data, Nlm_gbparse_rangefunc new_func)
190 {
191 #ifdef MAKE_THREAD_SAFE
192     NlmTlsSetValue(&Range_func_tls, (VoidPtr PNTR) new_func,NULL);
193     NlmTlsSetValue(&Nlm_gbparse_range_data_tls,data,NULL);
194 #else
195     Range_func = new_func;
196     Nlm_gbparse_range_data = data;
197 #endif
198 
199 }
200 
201 /*--------- Nlm_gbparse_error()-----------*/
202 
203 NLM_EXTERN void
Nlm_gbparse_error(CharPtr front,ValNodePtr head,ValNodePtr current)204 Nlm_gbparse_error(CharPtr front, ValNodePtr head, ValNodePtr current)
205 {
206     CharPtr details;
207 
208     MACRO_THREAD_SAVE_STATIC;
209 
210     details = Nlm_gbparse_point (head, current);
211     Err_func (front,details);
212     MemFree(details);
213 }
214 
215 /*------ Nlm_gbparse_point ()----*/
216 
217 NLM_EXTERN CharPtr
Nlm_gbparse_point(ValNodePtr head,ValNodePtr current)218 Nlm_gbparse_point (ValNodePtr head, ValNodePtr current)
219 {
220     CharPtr temp, retval = NULL;
221     int len = 0;
222     ValNodePtr now;
223 
224     for ( now = head; now ; now = now -> next){
225         switch ( now-> choice){
226             case GBPARSE_INT_JOIN :
227                 len += 4;
228                 break;
229             case GBPARSE_INT_COMPL :
230                 len += 10;
231                 break;
232             case GBPARSE_INT_LEFT :
233             case GBPARSE_INT_RIGHT :
234             case GBPARSE_INT_CARET :
235             case GBPARSE_INT_GT :
236             case GBPARSE_INT_LT :
237             case GBPARSE_INT_COMMA :
238             case GBPARSE_INT_SINGLE_DOT :
239                 len ++;
240                 break;
241             case GBPARSE_INT_DOT_DOT :
242                 len += 2;
243                 break;
244             case GBPARSE_INT_ACCESION :
245             case GBPARSE_INT_NUMBER :
246                 len += StringLen ( now -> data.ptrvalue);
247                 break;
248             case GBPARSE_INT_ORDER :
249             case GBPARSE_INT_GROUP :
250                 len += 5;
251                 break;
252             case GBPARSE_INT_ONE_OF :
253             case GBPARSE_INT_ONE_OF_NUM:
254                 len += 6;
255                 break;
256             case GBPARSE_INT_REPLACE :
257                 len += 7;
258                 break;
259             case GBPARSE_INT_STRING:
260                 len += StringLen(now ->data.ptrvalue) + 1;
261                 break;
262             case GBPARSE_INT_UNKNOWN :
263             default:
264                 break;
265         }
266         len ++; /* for space */
267 
268 
269         if ( now == current)
270             break;
271     }
272 
273 
274     if (len > 0){
275         temp = retval = MemNew(len+1);
276         for ( now = head; now ; now = now -> next){
277             switch ( now-> choice){
278                 case GBPARSE_INT_JOIN :
279                     temp = StringMove(temp,"join");
280                     break;
281                 case GBPARSE_INT_COMPL :
282                     temp = StringMove(temp,"complement");
283                     break;
284                 case GBPARSE_INT_LEFT :
285                     temp = StringMove(temp,"(");
286                     break;
287                 case GBPARSE_INT_RIGHT :
288                     temp = StringMove(temp,")");
289                     break;
290                 case GBPARSE_INT_CARET :
291                     temp = StringMove(temp,"^");
292                     break;
293                 case GBPARSE_INT_DOT_DOT :
294                     temp = StringMove(temp,"..");
295                     break;
296                 case GBPARSE_INT_ACCESION :
297                 case GBPARSE_INT_NUMBER :
298                 case GBPARSE_INT_STRING:
299                     temp = StringMove(temp,now -> data.ptrvalue);
300                     break;
301                 case GBPARSE_INT_GT :
302                     temp = StringMove(temp,">");
303                     break;
304                 case GBPARSE_INT_LT :
305                     temp = StringMove(temp,"<");
306                     break;
307                 case GBPARSE_INT_COMMA :
308                     temp = StringMove(temp,",");
309                     break;
310                 case GBPARSE_INT_ORDER :
311                     temp = StringMove(temp,"order");
312                     break;
313                 case GBPARSE_INT_SINGLE_DOT :
314                     temp = StringMove(temp,".");
315                     break;
316                 case GBPARSE_INT_GROUP :
317                     temp = StringMove(temp,"group");
318                     break;
319                 case GBPARSE_INT_ONE_OF :
320                 case GBPARSE_INT_ONE_OF_NUM:
321                     temp = StringMove(temp,"one-of");
322                     break;
323                 case GBPARSE_INT_REPLACE :
324                     temp = StringMove(temp,"replace");
325                     break;
326                 case GBPARSE_INT_UNKNOWN :
327                 default:
328                     break;
329             }
330                 temp = StringMove(temp," ");
331             if ( now == current)
332                 break;
333         }
334     }
335 
336     return retval;
337 }
338 
339 /*--------- Nlm_find_one_of_num()------------*/
340 /*
341 
342 Consider these for locations:
343          misc_signal     join(57..one-of(67,75),one-of(100,110)..200)
344      misc_signal     join(57..one-of(67,75),one-of(100,110..120),200)
345      misc_signal     join(57..one-of(67,75),one-of(100,110..115)..200)
346 
347      misc_signal     join(57..one-of(67,75),one-of(100,110),200)
348 
349 In the first three, the one-of() is functioning as an alternative set
350 of numbers, in the last, as an alternative set of locations (even
351 though the locations are points).
352 [yes the one-of(100,110..115).. is illegal]
353 
354   here is one more case:one-of(18,30)..470 so if the location
355   starts with a one-of, it also needs to be checked.
356 
357 To deal with this, the GBPARSE_INT_ONE_OF token type will be changed
358 by the following function to GBPARSE_INT_ONE_OF_NUM, in the three cases.
359 
360 note that this change is not necessary in this case:
361         join(100..200,300..one-of(400,500)), as after a ".." token,
362     it has to be a number.
363 
364 */
365 
366 static void
Nlm_find_one_of_num(ValNodePtr head_token)367 Nlm_find_one_of_num(ValNodePtr head_token)
368 {
369     ValNodePtr current, scanner;
370 
371     current = head_token;
372     if (current -> choice == GBPARSE_INT_ONE_OF){
373             scanner= current -> next;
374 /*-------(is first token after ")" a ".."?----*/
375             for (;scanner!=NULL; scanner = scanner -> next){
376                 if (scanner -> choice == GBPARSE_INT_RIGHT){
377                     scanner = scanner -> next;
378                     if (scanner != NULL){
379                         if (scanner -> choice == GBPARSE_INT_DOT_DOT){
380 /*---- this is it ! ! */
381                             current -> choice = GBPARSE_INT_ONE_OF_NUM;
382                         }
383                     }
384                     break;
385                 }
386             }
387     }
388     for (current = head_token; current != NULL; current = current -> next){
389         if ( current -> choice == GBPARSE_INT_COMMA ||
390             current -> choice == GBPARSE_INT_LEFT ){
391             scanner= current -> next;
392             if ( scanner != NULL){
393                 if (scanner -> choice == GBPARSE_INT_ONE_OF){
394 /*-------(is first token after ")" a ".."?----*/
395                     for (;scanner!=NULL; scanner = scanner -> next){
396                         if (scanner -> choice == GBPARSE_INT_RIGHT){
397                             scanner = scanner -> next;
398                             if (scanner != NULL){
399                                 if (scanner -> choice == GBPARSE_INT_DOT_DOT){
400 /*---- this is it ! ! */
401                                     current -> next -> choice
402                                         = GBPARSE_INT_ONE_OF_NUM;
403                                 }
404                             }
405                             break;
406                         }
407                     }
408                 }
409             }
410         }
411     }
412 
413 }
414 
415 /*---------- Nlm_gbparseint()-----*/
416 
417 NLM_EXTERN SeqLocPtr
Nlm_gbparseint(CharPtr raw_intervals,Boolean PNTR keep_rawPt,Boolean PNTR sitesPt,int PNTR num_errsPt,SeqIdPtr seq_id)418 Nlm_gbparseint(CharPtr raw_intervals, Boolean PNTR keep_rawPt, Boolean PNTR sitesPt, int PNTR num_errsPt, SeqIdPtr seq_id)
419 {
420     SeqLocPtr retval = NULL;
421     ValNodePtr head_token, current_token;
422     int paren_count = 0;
423     Boolean go_again;
424 
425     * keep_rawPt = FALSE;
426     * sitesPt = FALSE;
427 
428     head_token = NULL;
429     (*num_errsPt) = gbparselex(raw_intervals, & head_token);
430 
431     if (head_token == NULL) {
432         *num_errsPt = 1;
433         return NULL;
434     }
435     if ( ! (*num_errsPt)){
436         current_token = head_token;
437         Nlm_find_one_of_num(head_token);
438 
439     do {
440         go_again= FALSE;
441         if (current_token)
442         switch ( current_token -> choice){
443             case  GBPARSE_INT_JOIN : case  GBPARSE_INT_ORDER :
444             case  GBPARSE_INT_GROUP : case  GBPARSE_INT_ONE_OF :
445             case GBPARSE_INT_COMPL:
446             retval = Nlm_gbloc(keep_rawPt,  & paren_count, sitesPt, & current_token,
447                 head_token, (num_errsPt), seq_id);
448 /* need to check that out of tokens here */
449             retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
450                     head_token, retval, keep_rawPt,  paren_count);
451                 break;
452             case GBPARSE_INT_STRING:
453                 Nlm_gbparse_error("string in loc",
454                     head_token, current_token);
455                     * keep_rawPt = TRUE;  (* num_errsPt) ++;
456 /*  no break on purpose */
457             case  GBPARSE_INT_UNKNOWN :
458             default:
459             case  GBPARSE_INT_RIGHT :
460             case  GBPARSE_INT_DOT_DOT :
461             case  GBPARSE_INT_COMMA :
462             case  GBPARSE_INT_SINGLE_DOT :
463 
464                     Nlm_gbparse_error("illegal initial token",
465                         head_token, current_token);
466                         * keep_rawPt = TRUE;  (* num_errsPt) ++;
467                 current_token = current_token -> next;
468                 break;
469 
470             case  GBPARSE_INT_ACCESION :
471 /*--- no warn, but strange ---*/
472 /*-- no break on purpose ---*/
473 
474             case  GBPARSE_INT_CARET : case  GBPARSE_INT_GT :
475             case  GBPARSE_INT_LT : case  GBPARSE_INT_NUMBER :
476             case  GBPARSE_INT_LEFT :
477 
478             case GBPARSE_INT_ONE_OF_NUM:
479 
480             retval = Nlm_gbint(keep_rawPt,  & current_token,
481                 head_token, (num_errsPt), seq_id);
482 /* need to check that out of tokens here */
483             retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
484                     head_token, retval, keep_rawPt,  paren_count);
485                 break;
486 
487             case  GBPARSE_INT_REPLACE :
488             retval = Nlm_gbreplace(keep_rawPt,  & paren_count, sitesPt, & current_token,
489                 head_token, (num_errsPt), seq_id);
490                 * keep_rawPt = TRUE;
491 /*---all errors handled within this function ---*/
492                 break;
493             case GBPARSE_INT_SITES :
494                 * sitesPt = TRUE;
495                 go_again = TRUE;
496                 current_token = current_token -> next;
497                 break;
498         }
499     }while (go_again && current_token);
500     }else{
501         * keep_rawPt = TRUE;
502     }
503 
504     if ( head_token)
505         ValNodeFreeData(head_token);
506 
507     if ( (*num_errsPt)){
508         SeqLocFree(retval);
509         retval = NULL;
510     }
511     return retval;
512 }
513 
514 /*---------- Nlm_gbparseint_ver()-----*/
515 
Nlm_gbparseint_ver(CharPtr raw_intervals,Boolean PNTR keep_rawPt,Boolean PNTR sitesPt,int PNTR num_errsPt,SeqIdPtr seq_id,Boolean accver)516 NLM_EXTERN SeqLocPtr Nlm_gbparseint_ver(CharPtr raw_intervals,
517                                         Boolean PNTR keep_rawPt,
518                                         Boolean PNTR sitesPt,
519                                         int PNTR num_errsPt,
520                                         SeqIdPtr seq_id, Boolean accver)
521 {
522     SeqLocPtr retval = NULL;
523     ValNodePtr head_token, current_token;
524     int paren_count = 0;
525     Boolean go_again;
526 
527     * keep_rawPt = FALSE;
528     * sitesPt = FALSE;
529 
530     head_token = NULL;
531     (*num_errsPt) = Nlm_gbparselex_ver(raw_intervals, &head_token, accver);
532 
533     if (head_token == NULL) {
534         *num_errsPt = 1;
535         return NULL;
536     }
537     if ( ! (*num_errsPt)){
538         current_token = head_token;
539         Nlm_find_one_of_num(head_token);
540 
541     do {
542         go_again= FALSE;
543         if (current_token)
544         switch ( current_token -> choice){
545             case  GBPARSE_INT_JOIN : case  GBPARSE_INT_ORDER :
546             case  GBPARSE_INT_GROUP : case  GBPARSE_INT_ONE_OF :
547             case GBPARSE_INT_COMPL:
548             retval = Nlm_gbloc_ver(keep_rawPt,  & paren_count, sitesPt, & current_token,
549                 head_token, (num_errsPt), seq_id, accver);
550 /* need to check that out of tokens here */
551             retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
552                     head_token, retval, keep_rawPt,  paren_count);
553                 break;
554             case GBPARSE_INT_STRING:
555                 Nlm_gbparse_error("string in loc",
556                     head_token, current_token);
557                     * keep_rawPt = TRUE;  (* num_errsPt) ++;
558 /*  no break on purpose */
559             case  GBPARSE_INT_UNKNOWN :
560             default:
561             case  GBPARSE_INT_RIGHT :
562             case  GBPARSE_INT_DOT_DOT :
563             case  GBPARSE_INT_COMMA :
564             case  GBPARSE_INT_SINGLE_DOT :
565 
566                     Nlm_gbparse_error("illegal initial token",
567                         head_token, current_token);
568                         * keep_rawPt = TRUE;  (* num_errsPt) ++;
569                 current_token = current_token -> next;
570                 break;
571 
572             case  GBPARSE_INT_ACCESION :
573 /*--- no warn, but strange ---*/
574 /*-- no break on purpose ---*/
575 
576             case  GBPARSE_INT_CARET : case  GBPARSE_INT_GT :
577             case  GBPARSE_INT_LT : case  GBPARSE_INT_NUMBER :
578             case  GBPARSE_INT_LEFT :
579 
580             case GBPARSE_INT_ONE_OF_NUM:
581 
582             retval = Nlm_gbint_ver(keep_rawPt,  & current_token,
583                 head_token, (num_errsPt), seq_id, accver);
584 /* need to check that out of tokens here */
585             retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
586                     head_token, retval, keep_rawPt,  paren_count);
587                 break;
588 
589             case  GBPARSE_INT_REPLACE :
590             retval = Nlm_gbreplace_ver(keep_rawPt,  & paren_count, sitesPt, & current_token,
591                 head_token, (num_errsPt), seq_id, accver);
592                 * keep_rawPt = TRUE;
593 /*---all errors handled within this function ---*/
594                 break;
595             case GBPARSE_INT_SITES :
596                 * sitesPt = TRUE;
597                 go_again = TRUE;
598                 current_token = current_token -> next;
599                 break;
600         }
601     }while (go_again && current_token);
602     }else{
603         * keep_rawPt = TRUE;
604     }
605 
606     if ( head_token)
607         ValNodeFreeData(head_token);
608 
609     if ( (*num_errsPt)){
610         SeqLocFree(retval);
611         retval = NULL;
612     }
613     return retval;
614 }
615 
616 /*---------- Nlm_gbloc()-----*/
617 
618 NLM_EXTERN SeqLocPtr
Nlm_gbloc(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id)619 Nlm_gbloc(Boolean PNTR keep_rawPt, int PNTR parenPt, Boolean PNTR sitesPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
620 {
621     SeqLocPtr retval =NULL;
622     Boolean add_nulls=FALSE;
623     ValNodePtr current_token = * currentPt;
624     Boolean did_complement= FALSE;
625     Boolean go_again ;
626 
627     do {
628         go_again= FALSE;
629         switch ( current_token -> choice){
630             case  GBPARSE_INT_COMPL :
631             *currentPt = (* currentPt) -> next;
632             if ( (*currentPt) == NULL){
633                 Nlm_gbparse_error("unexpected end of usable tokens",
634                     head_token, *currentPt);
635                 * keep_rawPt = TRUE;  (* num_errPt) ++;
636                 goto FATAL;
637             }
638             if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
639                     Nlm_gbparse_error("Missing \'(\'", /* paran match  ) */
640                         head_token, * currentPt);
641                     * keep_rawPt = TRUE;  (* num_errPt) ++;
642                     goto FATAL;
643             }else{
644                 (*parenPt) ++; *currentPt = (* currentPt) -> next;
645                 if ( ! * currentPt){
646                         Nlm_gbparse_error("illegal null contents",
647                             head_token, *currentPt);
648                         * keep_rawPt = TRUE;  (* num_errPt) ++;
649                     goto FATAL;
650                 }else{
651                     if (  (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
652                         Nlm_gbparse_error("Premature \')\'",
653                             head_token, *currentPt);
654                         * keep_rawPt = TRUE;  (* num_errPt) ++;
655                         goto FATAL;
656                     }else{
657                         retval =  Nlm_gbloc (keep_rawPt, parenPt, sitesPt, currentPt,
658                             head_token, num_errPt,seq_id) ;
659                         SeqLocRevCmp ( retval);
660                         did_complement= TRUE;
661                         if ( * currentPt){
662                             if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
663                                     Nlm_gbparse_error("Missing \')\'",
664                                         head_token, *currentPt);
665                                     * keep_rawPt = TRUE;  (* num_errPt) ++;
666                                     goto FATAL;
667                             }else{
668                                 (*parenPt) --; *currentPt = (* currentPt) -> next;
669                             }
670                         }else{
671                             Nlm_gbparse_error("Missing \')\'",
672                                 head_token, *currentPt);
673                             * keep_rawPt = TRUE;  (* num_errPt) ++;
674                             goto FATAL;
675                         }
676                     }
677                 }
678             }
679                 break;
680 /* REAL LOCS */
681             case GBPARSE_INT_JOIN : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; break;
682             case  GBPARSE_INT_ORDER : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; add_nulls=TRUE;break;
683             case  GBPARSE_INT_GROUP : * keep_rawPt = TRUE; retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX;
684                 break;
685             case  GBPARSE_INT_ONE_OF : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_EQUIV; break;
686 
687 /* ERROR */
688             case GBPARSE_INT_STRING:
689                 Nlm_gbparse_error("string in loc",
690                     head_token, current_token);
691                     * keep_rawPt = TRUE;  (* num_errPt) ++;
692                     goto FATAL;
693 /*--- no break on purpose---*/
694             case  GBPARSE_INT_UNKNOWN : default:
695             case  GBPARSE_INT_RIGHT : case  GBPARSE_INT_DOT_DOT:case  GBPARSE_INT_COMMA :
696             case  GBPARSE_INT_SINGLE_DOT :
697                 Nlm_gbparse_error("illegal initial loc token",
698                     head_token, *currentPt);
699                 * keep_rawPt = TRUE;  (* num_errPt) ++;
700                     goto FATAL;
701 
702 /* Interval, occurs on recursion */
703             case GBPARSE_INT_GAP:
704                 Nlm_gbgap(currentPt, &retval, FALSE);
705                 break;
706             case GBPARSE_INT_UNK_GAP:
707                 Nlm_gbgap(currentPt, &retval, TRUE);
708                 break;
709             case  GBPARSE_INT_ACCESION :
710             case  GBPARSE_INT_CARET : case  GBPARSE_INT_GT :
711             case  GBPARSE_INT_LT : case  GBPARSE_INT_NUMBER :
712             case  GBPARSE_INT_LEFT :
713 
714             case GBPARSE_INT_ONE_OF_NUM:
715 
716             retval = Nlm_gbint(keep_rawPt, currentPt,
717                 head_token, num_errPt, seq_id);
718                 break;
719 
720             case  GBPARSE_INT_REPLACE :
721 /*-------illegal at this level --*/
722                 Nlm_gbparse_error("illegal replace",
723                     head_token, *currentPt);
724                 * keep_rawPt = TRUE;  (* num_errPt) ++;
725                     goto FATAL;
726             case GBPARSE_INT_SITES :
727                 * sitesPt = TRUE;
728                 go_again = TRUE;
729                 (*currentPt) = (*currentPt) -> next;
730                 break;
731         }
732     } while (go_again && *currentPt);
733 
734     if ( !  (* num_errPt)) if (retval && retval->choice != SEQLOC_NULL)
735     if ( retval -> choice != SEQLOC_INT && retval -> choice != SEQLOC_PNT
736             && ! did_complement){
737 /*--------
738  * ONLY THE CHOICE has been set. the "join", etc. only has been noted
739  *----*/
740         *currentPt = (* currentPt) -> next;
741         if ( ! * currentPt){
742                 Nlm_gbparse_error("unexpected end of interval tokens",
743                     head_token, *currentPt);
744                         * keep_rawPt = TRUE;  (* num_errPt) ++;
745                     goto FATAL;
746         }else{
747             if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
748                     Nlm_gbparse_error("Missing \'(\'",
749                         head_token, *currentPt); /* paran match  ) */
750                     * keep_rawPt = TRUE;  (* num_errPt) ++;
751                     goto FATAL;
752             }else{
753                 (*parenPt) ++; *currentPt = (* currentPt) -> next;
754                 if ( ! * currentPt){
755                         Nlm_gbparse_error("illegal null contents",
756                             head_token, *currentPt);
757                         * keep_rawPt = TRUE;  (* num_errPt) ++;
758                         goto FATAL;
759                 }else{
760                     if (  (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
761                         Nlm_gbparse_error("Premature \')\'" ,
762                             head_token, *currentPt);
763                         * keep_rawPt = TRUE;  (* num_errPt) ++;
764                     goto FATAL;
765                     }else{
766 
767                         ValNodePtr last= NULL, next_loc = NULL;
768 
769                         while ( ! *num_errPt && * currentPt){
770                             if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
771                                 while  ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
772                                     (*parenPt) --;
773                                     *currentPt = (* currentPt) -> next;
774                                     if ( ! *currentPt)
775                                         break;
776                                 }
777                                 break;
778                             }
779                             if ( ! * currentPt){
780                                 break;
781                             }
782                             next_loc = Nlm_gbloc(keep_rawPt, parenPt,sitesPt,
783                                 currentPt, head_token, num_errPt,
784                                 seq_id);
785                             if( retval -> data.ptrvalue == NULL)
786                                 retval -> data.ptrvalue = next_loc;
787                             if ( last)
788                                 last -> next = next_loc;
789                             last = next_loc;
790                             if ( ! * currentPt){
791                                 break;
792                             }
793                             if ( ! * currentPt){
794                                 break;
795                             }
796                             if ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
797                                 break;
798                             }
799                             if (  (* currentPt) -> choice == GBPARSE_INT_COMMA){
800                                 *currentPt = (* currentPt) -> next;
801                                 if(add_nulls){
802                                     next_loc = ValNodeNew(last);
803                                     next_loc -> choice = SEQLOC_NULL;
804                                     last -> next = next_loc;
805                                     last = next_loc;
806                                 }
807                             }else{
808                                 Nlm_gbparse_error("Illegal token after interval",
809                                     head_token, *currentPt);
810                                 * keep_rawPt = TRUE;  (* num_errPt) ++;
811                                 goto FATAL;
812                             }
813                         }
814                     }
815                 }
816                 if ( (*currentPt) == NULL){
817                     Nlm_gbparse_error("unexpected end of usable tokens",
818                         head_token, *currentPt);
819                     * keep_rawPt = TRUE;  (* num_errPt) ++;
820                     goto FATAL;
821                 }else{
822                     if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
823                             Nlm_gbparse_error("Missing \')\'" /* paran match  ) */,
824                                      head_token, *currentPt);
825                             * keep_rawPt = TRUE;  (* num_errPt) ++;
826                         goto FATAL;
827                     }else{
828                         (*parenPt) --; *currentPt = (* currentPt) -> next;
829                     }
830                 }
831             }
832         }
833     }
834 
835 FATAL:
836     if ( (* num_errPt)){
837         if (retval){
838             SeqLocFree(retval);
839             retval =ValNodeNew(NULL);
840             retval -> choice = SEQLOC_WHOLE;
841             retval -> data.ptrvalue = SeqIdDup(seq_id);
842         }
843     }
844 
845     return retval;
846 }
847 
848 /*---------- Nlm_gbloc_ver()-----*/
849 
Nlm_gbloc_ver(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id,Boolean accver)850 NLM_EXTERN SeqLocPtr Nlm_gbloc_ver(Boolean PNTR keep_rawPt, int PNTR parenPt,
851                                Boolean PNTR sitesPt, ValNodePtr PNTR currentPt,
852                                ValNodePtr head_token, int PNTR num_errPt,
853                                SeqIdPtr seq_id, Boolean accver)
854 {
855     SeqLocPtr retval =NULL;
856     Boolean add_nulls=FALSE;
857     ValNodePtr current_token = * currentPt;
858     Boolean did_complement= FALSE;
859     Boolean go_again ;
860 
861     do {
862         go_again= FALSE;
863         switch ( current_token -> choice){
864             case  GBPARSE_INT_COMPL :
865             *currentPt = (* currentPt) -> next;
866             if ( (*currentPt) == NULL){
867                 Nlm_gbparse_error("unexpected end of usable tokens",
868                     head_token, *currentPt);
869                 * keep_rawPt = TRUE;  (* num_errPt) ++;
870                 goto FATAL;
871             }
872             if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
873                     Nlm_gbparse_error("Missing \'(\'", /* paran match  ) */
874                         head_token, * currentPt);
875                     * keep_rawPt = TRUE;  (* num_errPt) ++;
876                     goto FATAL;
877             }else{
878                 (*parenPt) ++; *currentPt = (* currentPt) -> next;
879                 if ( ! * currentPt){
880                         Nlm_gbparse_error("illegal null contents",
881                             head_token, *currentPt);
882                         * keep_rawPt = TRUE;  (* num_errPt) ++;
883                     goto FATAL;
884                 }else{
885                     if (  (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
886                         Nlm_gbparse_error("Premature \')\'",
887                             head_token, *currentPt);
888                         * keep_rawPt = TRUE;  (* num_errPt) ++;
889                         goto FATAL;
890                     }else{
891                         retval =  Nlm_gbloc_ver (keep_rawPt, parenPt, sitesPt, currentPt,
892                             head_token, num_errPt,seq_id, accver) ;
893                         SeqLocRevCmp ( retval);
894                         did_complement= TRUE;
895                         if ( * currentPt){
896                             if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
897                                     Nlm_gbparse_error("Missing \')\'",
898                                         head_token, *currentPt);
899                                     * keep_rawPt = TRUE;  (* num_errPt) ++;
900                                     goto FATAL;
901                             }else{
902                                 (*parenPt) --; *currentPt = (* currentPt) -> next;
903                             }
904                         }else{
905                             Nlm_gbparse_error("Missing \')\'",
906                                 head_token, *currentPt);
907                             * keep_rawPt = TRUE;  (* num_errPt) ++;
908                             goto FATAL;
909                         }
910                     }
911                 }
912             }
913                 break;
914 /* REAL LOCS */
915             case GBPARSE_INT_JOIN : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; break;
916             case  GBPARSE_INT_ORDER : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; add_nulls=TRUE;break;
917             case  GBPARSE_INT_GROUP : * keep_rawPt = TRUE; retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX;
918                 break;
919             case  GBPARSE_INT_ONE_OF : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_EQUIV; break;
920 
921 /* ERROR */
922             case GBPARSE_INT_STRING:
923                 Nlm_gbparse_error("string in loc",
924                     head_token, current_token);
925                     * keep_rawPt = TRUE;  (* num_errPt) ++;
926                     goto FATAL;
927 /*--- no break on purpose---*/
928             case  GBPARSE_INT_UNKNOWN : default:
929             case  GBPARSE_INT_RIGHT : case  GBPARSE_INT_DOT_DOT:case  GBPARSE_INT_COMMA :
930             case  GBPARSE_INT_SINGLE_DOT :
931                 Nlm_gbparse_error("illegal initial loc token",
932                     head_token, *currentPt);
933                 * keep_rawPt = TRUE;  (* num_errPt) ++;
934                     goto FATAL;
935 
936 /* Interval, occurs on recursion */
937             case GBPARSE_INT_GAP:
938                 Nlm_gbgap(currentPt, &retval, FALSE);
939                 break;
940             case GBPARSE_INT_UNK_GAP:
941                 Nlm_gbgap(currentPt, &retval, TRUE);
942                 break;
943             case  GBPARSE_INT_ACCESION :
944             case  GBPARSE_INT_CARET : case  GBPARSE_INT_GT :
945             case  GBPARSE_INT_LT : case  GBPARSE_INT_NUMBER :
946             case  GBPARSE_INT_LEFT :
947 
948             case GBPARSE_INT_ONE_OF_NUM:
949 
950             retval = Nlm_gbint_ver(keep_rawPt, currentPt,
951                 head_token, num_errPt, seq_id, accver);
952                 break;
953 
954             case  GBPARSE_INT_REPLACE :
955 /*-------illegal at this level --*/
956                 Nlm_gbparse_error("illegal replace",
957                     head_token, *currentPt);
958                 * keep_rawPt = TRUE;  (* num_errPt) ++;
959                     goto FATAL;
960             case GBPARSE_INT_SITES :
961                 * sitesPt = TRUE;
962                 go_again = TRUE;
963                 (*currentPt) = (*currentPt) -> next;
964                 break;
965         }
966     } while (go_again && *currentPt);
967 
968     if ( !  (* num_errPt)) if (retval && retval->choice != SEQLOC_NULL)
969     if ( retval -> choice != SEQLOC_INT && retval -> choice != SEQLOC_PNT
970             && ! did_complement){
971 /*--------
972  * ONLY THE CHOICE has been set. the "join", etc. only has been noted
973  *----*/
974         *currentPt = (* currentPt) -> next;
975         if ( ! * currentPt){
976                 Nlm_gbparse_error("unexpected end of interval tokens",
977                     head_token, *currentPt);
978                         * keep_rawPt = TRUE;  (* num_errPt) ++;
979                     goto FATAL;
980         }else{
981             if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
982                     Nlm_gbparse_error("Missing \'(\'",
983                         head_token, *currentPt); /* paran match  ) */
984                     * keep_rawPt = TRUE;  (* num_errPt) ++;
985                     goto FATAL;
986             }else{
987                 (*parenPt) ++; *currentPt = (* currentPt) -> next;
988                 if ( ! * currentPt){
989                         Nlm_gbparse_error("illegal null contents",
990                             head_token, *currentPt);
991                         * keep_rawPt = TRUE;  (* num_errPt) ++;
992                         goto FATAL;
993                 }else{
994                     if (  (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
995                         Nlm_gbparse_error("Premature \')\'" ,
996                             head_token, *currentPt);
997                         * keep_rawPt = TRUE;  (* num_errPt) ++;
998                     goto FATAL;
999                     }else{
1000 
1001                         ValNodePtr last= NULL, next_loc = NULL;
1002 
1003                         while ( ! *num_errPt && * currentPt){
1004                             if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1005                                 while  ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
1006                                     (*parenPt) --;
1007                                     *currentPt = (* currentPt) -> next;
1008                                     if ( ! *currentPt)
1009                                         break;
1010                                 }
1011                                 break;
1012                             }
1013                             if ( ! * currentPt){
1014                                 break;
1015                             }
1016                             next_loc = Nlm_gbloc_ver(keep_rawPt, parenPt,sitesPt,
1017                                 currentPt, head_token, num_errPt,
1018                                 seq_id, accver);
1019                             if( retval -> data.ptrvalue == NULL)
1020                                 retval -> data.ptrvalue = next_loc;
1021                             if ( last)
1022                                 last -> next = next_loc;
1023                             last = next_loc;
1024                             if ( ! * currentPt){
1025                                 break;
1026                             }
1027                             if ( ! * currentPt){
1028                                 break;
1029                             }
1030                             if ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
1031                                 break;
1032                             }
1033                             if (  (* currentPt) -> choice == GBPARSE_INT_COMMA){
1034                                 *currentPt = (* currentPt) -> next;
1035                                 if(add_nulls){
1036                                     next_loc = ValNodeNew(last);
1037                                     next_loc -> choice = SEQLOC_NULL;
1038                                     last -> next = next_loc;
1039                                     last = next_loc;
1040                                 }
1041                             }else{
1042                                 Nlm_gbparse_error("Illegal token after interval",
1043                                     head_token, *currentPt);
1044                                 * keep_rawPt = TRUE;  (* num_errPt) ++;
1045                                 goto FATAL;
1046                             }
1047                         }
1048                     }
1049                 }
1050                 if ( (*currentPt) == NULL){
1051                     Nlm_gbparse_error("unexpected end of usable tokens",
1052                         head_token, *currentPt);
1053                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1054                     goto FATAL;
1055                 }else{
1056                     if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
1057                             Nlm_gbparse_error("Missing \')\'" /* paran match  ) */,
1058                                      head_token, *currentPt);
1059                             * keep_rawPt = TRUE;  (* num_errPt) ++;
1060                         goto FATAL;
1061                     }else{
1062                         (*parenPt) --; *currentPt = (* currentPt) -> next;
1063                     }
1064                 }
1065             }
1066         }
1067     }
1068 
1069 FATAL:
1070     if ( (* num_errPt)){
1071         if (retval){
1072             SeqLocFree(retval);
1073             retval =ValNodeNew(NULL);
1074             retval -> choice = SEQLOC_WHOLE;
1075             retval -> data.ptrvalue = SeqIdDup(seq_id);
1076         }
1077     }
1078 
1079     return retval;
1080 }
1081 
1082 /**********************************************************/
Nlm_gbgap(ValNodePtr PNTR currentPt,ValNodePtr PNTR retval,Boolean unknown)1083 void Nlm_gbgap(ValNodePtr PNTR currentPt, ValNodePtr PNTR retval,
1084                Boolean unknown)
1085 {
1086     ValNodePtr vnp_first;
1087     ValNodePtr vnp_second;
1088     ValNodePtr vnp_third;
1089     SeqLocPtr  vvv;
1090 
1091     vnp_first = (*currentPt)->next;
1092     if(vnp_first == NULL || vnp_first->choice != GBPARSE_INT_LEFT)
1093         return;
1094 
1095     vnp_second = vnp_first->next;
1096     if(vnp_second == NULL || (vnp_second->choice != GBPARSE_INT_NUMBER &&
1097        vnp_second->choice != GBPARSE_INT_RIGHT))
1098         return;
1099 
1100     if(vnp_second->choice == GBPARSE_INT_RIGHT)
1101     {
1102         (*retval) = ValNodeNew(*retval);
1103         (*retval)->choice = SEQLOC_NULL;
1104     }
1105     else
1106     {
1107         vnp_third = vnp_second->next;
1108         if(vnp_third == NULL || vnp_third->choice != GBPARSE_INT_RIGHT)
1109             return;
1110 
1111         vvv = GapToSeqLocEx(atoi((CharPtr) vnp_second->data.ptrvalue), unknown);
1112         if(vvv == NULL)
1113             return;
1114 
1115         if(*retval == NULL)
1116             (*retval) = vvv;
1117         else
1118         {
1119             (*retval)->next = vvv;
1120             (*retval) = (*retval)->next;
1121         }
1122 
1123         (*currentPt) = (*currentPt)->next;
1124     }
1125 
1126     (*currentPt) = (*currentPt)->next;
1127     (*currentPt) = (*currentPt)->next;
1128     (*currentPt) = (*currentPt)->next;
1129 }
1130 
1131 /*--------------- Nlm_gbint ()--------------------*/
1132 
1133 NLM_EXTERN SeqLocPtr /* sometimes returns points */
1134 
Nlm_gbint(Boolean PNTR keep_rawPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id)1135 Nlm_gbint(Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
1136 {
1137     SeqLocPtr retnode = ValNodeNew(NULL);
1138     SeqIntPtr retint = SeqIntNew();
1139     TextSeqIdPtr tp;
1140     IntFuzzPtr fuzz=NULL;
1141     SeqIdPtr idp = NULL;
1142     Boolean took_choice=FALSE;
1143 
1144     retnode -> choice = SEQLOC_INT;
1145 
1146         if ( (* currentPt) -> choice == GBPARSE_INT_ACCESION){
1147             idp = ValNodeNew(NULL);
1148             if (seq_id){
1149                 if (
1150                     seq_id -> choice == SEQID_GENBANK
1151                      || seq_id -> choice == SEQID_EMBL
1152                      || seq_id -> choice == SEQID_DDBJ
1153                 ){
1154                     idp -> choice = seq_id -> choice;
1155                     took_choice = TRUE;
1156                 }
1157             }
1158             if (! took_choice){
1159                 idp -> choice = SEQID_GENBANK;
1160             }
1161             tp = TextSeqIdNew();
1162             idp -> data.ptrvalue = tp;
1163             tp -> accession = StringSave ( (* currentPt) ->data.ptrvalue);
1164              *currentPt  =  (* currentPt)  -> next;
1165             if ( !  *currentPt ){
1166                     Nlm_gbparse_error("Nothing after accession",
1167                         head_token, *currentPt);
1168                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1169 
1170                     SeqIdFree(idp);
1171                     idp = NULL;
1172 
1173                     goto FATAL;
1174             }
1175         }else{
1176             idp = SeqIdDup (seq_id);
1177         }
1178         if ( (* currentPt) -> choice == GBPARSE_INT_LT){
1179                 fuzz = IntFuzzNew();
1180                 fuzz -> choice = 4;
1181                 fuzz ->a = 2;
1182              *currentPt  =  (* currentPt)  -> next;
1183             if ( !  *currentPt ){
1184                     Nlm_gbparse_error("Nothing after \'<\'",
1185                         head_token, *currentPt);
1186                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1187                     goto FATAL;
1188             }
1189         }
1190         if ( ! (* num_errPt))
1191         switch ( (*currentPt ) -> choice){
1192             case  GBPARSE_INT_ACCESION :
1193                 if ( idp){
1194                     Nlm_gbparse_error("duplicate accessions",
1195                         head_token, *currentPt);
1196                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1197                     goto FATAL;
1198                 }
1199                 break;
1200             case  GBPARSE_INT_CARET :
1201                     Nlm_gbparse_error("caret (^) before number" ,
1202                         head_token, *currentPt);
1203                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1204                     goto FATAL;
1205             case  GBPARSE_INT_LT :
1206                 if ( idp){
1207                     Nlm_gbparse_error("duplicate \'<\'",
1208                         head_token, *currentPt);
1209                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1210                     goto FATAL;
1211                 }
1212                 break;
1213             case  GBPARSE_INT_GT :
1214             case  GBPARSE_INT_NUMBER :
1215             case  GBPARSE_INT_LEFT :
1216 
1217             case GBPARSE_INT_ONE_OF_NUM:
1218 
1219                 retint -> if_from = fuzz;
1220                 retint -> id = idp;
1221                 retnode -> data.ptrvalue = retint;
1222                 Nlm_gbload_number (& ( retint -> from), & (retint -> if_from),
1223                     keep_rawPt, currentPt, head_token,
1224                     num_errPt,TAKE_FIRST);
1225                 Nlm_gbcheck_range(retint -> from, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1226         if ( ! (* num_errPt) ){
1227         if ( * currentPt){
1228                 Boolean in_caret = FALSE;
1229         switch ( (*currentPt ) -> choice){
1230                 SeqPntPtr point;
1231 
1232                 default: case GBPARSE_INT_JOIN: case GBPARSE_INT_COMPL:
1233                 case GBPARSE_INT_SINGLE_DOT:case GBPARSE_INT_ORDER: case GBPARSE_INT_GROUP:
1234                 case GBPARSE_INT_ACCESION:
1235                     Nlm_gbparse_error("problem with 2nd number",
1236                         head_token, *currentPt);;
1237                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1238                     goto FATAL;
1239                 case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1240 /*--------------but have a point, not an interval----*/
1241                     Nlm_gbpintpnt(retnode, & retint);
1242                     break;
1243                 case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1244                     Nlm_gbparse_error("Missing \'..\'",
1245                         head_token, *currentPt);;
1246                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1247                     goto FATAL;
1248                 case GBPARSE_INT_CARET:
1249                 if (retint -> if_from){
1250                     Nlm_gbparse_error("\'<\' then \'^\'",
1251                     head_token, *currentPt);
1252                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1253                     goto FATAL;
1254                 }
1255                 retint -> if_from = IntFuzzNew();
1256                 retint -> if_from -> choice = 4;
1257                 retint -> if_from ->a = 4;
1258                 retint -> if_to = IntFuzzNew();
1259                 retint -> if_to -> choice = 4;
1260                 retint -> if_to ->a = 4;
1261                 in_caret = TRUE;
1262 /*---no break on purpose ---*/
1263                 case GBPARSE_INT_DOT_DOT:
1264              *currentPt  =  (* currentPt)  -> next;
1265             if ( (*currentPt) == NULL){
1266                 Nlm_gbparse_error("unexpected end of usable tokens",
1267                     head_token, *currentPt);
1268                 * keep_rawPt = TRUE;  (* num_errPt) ++;
1269                 goto FATAL;
1270             }
1271 /*--no break on purpose here ---*/
1272                 case GBPARSE_INT_NUMBER:
1273                 case GBPARSE_INT_LEFT:
1274 
1275                 case GBPARSE_INT_ONE_OF_NUM:  /* unlikely, but ok */
1276 
1277                 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1278                         if (retint -> if_from){
1279                             Nlm_gbparse_error("\'^\' then \'>\'",
1280                                 head_token, *currentPt);
1281                             * keep_rawPt = TRUE;  (* num_errPt) ++;
1282                             goto FATAL;
1283                         }
1284                 }
1285                 Nlm_gbload_number (& ( retint -> to), & (retint -> if_to),
1286                     keep_rawPt, currentPt, head_token,
1287                     num_errPt, TAKE_SECOND);
1288                 Nlm_gbcheck_range(retint -> to, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1289 /*----------
1290  *  The caret location implies a place (point) between two location.
1291  *  This is not exactly captured by the ASN.1, but pretty close
1292  *-------*/
1293                 if (in_caret){
1294                     Int4 to = retint -> to;
1295 
1296                     point = Nlm_gbpintpnt(retnode, & retint);
1297                     if ( point -> point +1 == to){
1298                         point -> point = to; /* was essentailly correct */
1299                     }else{
1300                         point -> fuzz -> choice = 2; /* range */
1301                         point -> fuzz -> a = to; /* max */
1302                         point -> fuzz ->b = point -> point;
1303                     }
1304                 }
1305                 if (retint != NULL)
1306                 if (retint -> from == retint -> to &&
1307                         ! retint -> if_from &&
1308                         ! retint -> if_to){
1309 /*-------if interval really a point, make is so ----*/
1310                     Nlm_gbpintpnt(retnode, & retint);
1311                 }
1312                 } /* end switch */
1313                 }else{
1314                     Nlm_gbpintpnt(retnode, & retint);
1315                 }
1316             }else{
1317                 goto FATAL;
1318             }
1319                 break;
1320             default:
1321                     Nlm_gbparse_error("No number when expected",
1322                         head_token, *currentPt);
1323                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1324                     goto FATAL;
1325 
1326         }
1327 
1328 
1329 RETURN:
1330         return retnode;
1331 
1332 FATAL:
1333         if (retint && (* num_errPt)){
1334             SeqIntFree(retint);
1335             retint = NULL;
1336         }
1337         ValNodeFree(retnode);
1338         retnode = NULL;
1339         goto RETURN;
1340 }
1341 
1342 /*--------------- Nlm_gbint_ver ()--------------------*/
1343 
1344 NLM_EXTERN SeqLocPtr /* sometimes returns points */
1345 
Nlm_gbint_ver(Boolean PNTR keep_rawPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id,Boolean accver)1346 Nlm_gbint_ver(Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt,
1347           ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id,
1348           Boolean accver)
1349 {
1350     SeqLocPtr retnode = ValNodeNew(NULL);
1351     SeqIntPtr retint = SeqIntNew();
1352     TextSeqIdPtr tp;
1353     IntFuzzPtr fuzz=NULL;
1354     SeqIdPtr idp = NULL;
1355     Boolean took_choice=FALSE;
1356     CharPtr p;
1357 
1358     retnode -> choice = SEQLOC_INT;
1359 
1360         if ( (* currentPt) -> choice == GBPARSE_INT_ACCESION){
1361             idp = ValNodeNew(NULL);
1362             if (seq_id){
1363                 if (
1364                     seq_id -> choice == SEQID_GENBANK
1365                      || seq_id -> choice == SEQID_EMBL
1366                      || seq_id -> choice == SEQID_DDBJ
1367                 ){
1368                     idp -> choice = seq_id -> choice;
1369                     took_choice = TRUE;
1370                 }
1371             }
1372             if (! took_choice){
1373                 idp -> choice = SEQID_GENBANK;
1374             }
1375             tp = TextSeqIdNew();
1376             idp -> data.ptrvalue = tp;
1377             if(accver == FALSE)
1378             {
1379                 tp->accession = StringSave((*currentPt)->data.ptrvalue);
1380             }
1381             else
1382             {
1383                 p = StringChr((*currentPt)->data.ptrvalue, '.');
1384                 if(p == NULL)
1385                 {
1386                     tp->accession = StringSave((*currentPt)->data.ptrvalue);
1387                     Nlm_gbparse_error("Missing accession's version",
1388                               head_token, *currentPt);
1389                 }
1390                 else
1391                 {
1392                     *p = '\0';
1393                     tp->accession = StringSave((*currentPt)->data.ptrvalue);
1394                     tp->version = atoi(p + 1);
1395                     *p = '.';
1396                 }
1397             }
1398             *currentPt  =  (* currentPt)  -> next;
1399             if ( !  *currentPt ){
1400                     Nlm_gbparse_error("Nothing after accession",
1401                         head_token, *currentPt);
1402                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1403 
1404                     SeqIdFree(idp);
1405                     idp = NULL;
1406 
1407                     goto FATAL;
1408             }
1409         }else{
1410             idp = SeqIdDup (seq_id);
1411         }
1412         if ( (* currentPt) -> choice == GBPARSE_INT_LT){
1413                 fuzz = IntFuzzNew();
1414                 fuzz -> choice = 4;
1415                 fuzz ->a = 2;
1416              *currentPt  =  (* currentPt)  -> next;
1417             if ( !  *currentPt ){
1418                     Nlm_gbparse_error("Nothing after \'<\'",
1419                         head_token, *currentPt);
1420                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1421                     goto FATAL;
1422             }
1423         }
1424         if ( ! (* num_errPt))
1425         switch ( (*currentPt ) -> choice){
1426             case  GBPARSE_INT_ACCESION :
1427                 if ( idp){
1428                     Nlm_gbparse_error("duplicate accessions",
1429                         head_token, *currentPt);
1430                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1431                     goto FATAL;
1432                 }
1433                 break;
1434             case  GBPARSE_INT_CARET :
1435                     Nlm_gbparse_error("caret (^) before number" ,
1436                         head_token, *currentPt);
1437                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1438                     goto FATAL;
1439             case  GBPARSE_INT_LT :
1440                 if ( idp){
1441                     Nlm_gbparse_error("duplicate \'<\'",
1442                         head_token, *currentPt);
1443                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1444                     goto FATAL;
1445                 }
1446                 break;
1447             case  GBPARSE_INT_GT :
1448             case  GBPARSE_INT_NUMBER :
1449             case  GBPARSE_INT_LEFT :
1450 
1451             case GBPARSE_INT_ONE_OF_NUM:
1452 
1453                 retint -> if_from = fuzz;
1454                 retint -> id = idp;
1455                 retnode -> data.ptrvalue = retint;
1456                 Nlm_gbload_number (& ( retint -> from), & (retint -> if_from),
1457                     keep_rawPt, currentPt, head_token,
1458                     num_errPt,TAKE_FIRST);
1459                 Nlm_gbcheck_range(retint -> from, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1460         if ( ! (* num_errPt) ){
1461         if ( * currentPt){
1462                 Boolean in_caret = FALSE;
1463         switch ( (*currentPt ) -> choice){
1464                 SeqPntPtr point;
1465 
1466                 default: case GBPARSE_INT_JOIN: case GBPARSE_INT_COMPL:
1467                 case GBPARSE_INT_SINGLE_DOT:case GBPARSE_INT_ORDER: case GBPARSE_INT_GROUP:
1468                 case GBPARSE_INT_ACCESION:
1469                     Nlm_gbparse_error("problem with 2nd number",
1470                         head_token, *currentPt);;
1471                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1472                     goto FATAL;
1473                 case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1474 /*--------------but have a point, not an interval----*/
1475                     Nlm_gbpintpnt(retnode, & retint);
1476                     break;
1477                 case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1478                     Nlm_gbparse_error("Missing \'..\'",
1479                         head_token, *currentPt);;
1480                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1481                     goto FATAL;
1482                 case GBPARSE_INT_CARET:
1483                 if (retint -> if_from){
1484                     Nlm_gbparse_error("\'<\' then \'^\'",
1485                     head_token, *currentPt);
1486                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1487                     goto FATAL;
1488                 }
1489                 retint -> if_from = IntFuzzNew();
1490                 retint -> if_from -> choice = 4;
1491                 retint -> if_from ->a = 4;
1492                 retint -> if_to = IntFuzzNew();
1493                 retint -> if_to -> choice = 4;
1494                 retint -> if_to ->a = 4;
1495                 in_caret = TRUE;
1496 /*---no break on purpose ---*/
1497                 case GBPARSE_INT_DOT_DOT:
1498              *currentPt  =  (* currentPt)  -> next;
1499             if ( (*currentPt) == NULL){
1500                 Nlm_gbparse_error("unexpected end of usable tokens",
1501                     head_token, *currentPt);
1502                 * keep_rawPt = TRUE;  (* num_errPt) ++;
1503                 goto FATAL;
1504             }
1505 /*--no break on purpose here ---*/
1506                 case GBPARSE_INT_NUMBER:
1507                 case GBPARSE_INT_LEFT:
1508 
1509                 case GBPARSE_INT_ONE_OF_NUM:  /* unlikely, but ok */
1510 
1511                 if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1512                         if (retint -> if_from){
1513                             Nlm_gbparse_error("\'^\' then \'>\'",
1514                                 head_token, *currentPt);
1515                             * keep_rawPt = TRUE;  (* num_errPt) ++;
1516                             goto FATAL;
1517                         }
1518                 }
1519                 Nlm_gbload_number (& ( retint -> to), & (retint -> if_to),
1520                     keep_rawPt, currentPt, head_token,
1521                     num_errPt, TAKE_SECOND);
1522                 Nlm_gbcheck_range(retint -> to, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1523 /*----------
1524  *  The caret location implies a place (point) between two location.
1525  *  This is not exactly captured by the ASN.1, but pretty close
1526  *-------*/
1527                 if (in_caret){
1528                     Int4 to = retint -> to;
1529 
1530                     point = Nlm_gbpintpnt(retnode, & retint);
1531                     if ( point -> point +1 == to){
1532                         point -> point = to; /* was essentailly correct */
1533                     }else{
1534                         point -> fuzz -> choice = 2; /* range */
1535                         point -> fuzz -> a = to; /* max */
1536                         point -> fuzz ->b = point -> point;
1537                     }
1538                 }
1539                 if (retint != NULL)
1540                 if (retint -> from == retint -> to &&
1541                         ! retint -> if_from &&
1542                         ! retint -> if_to){
1543 /*-------if interval really a point, make is so ----*/
1544                     Nlm_gbpintpnt(retnode, & retint);
1545                 }
1546                 } /* end switch */
1547                 }else{
1548                     Nlm_gbpintpnt(retnode, & retint);
1549                 }
1550             }else{
1551                 goto FATAL;
1552             }
1553                 break;
1554             default:
1555                     Nlm_gbparse_error("No number when expected",
1556                         head_token, *currentPt);
1557                     * keep_rawPt = TRUE;  (* num_errPt) ++;
1558                     goto FATAL;
1559 
1560         }
1561 
1562 
1563 RETURN:
1564         return retnode;
1565 
1566 FATAL:
1567         if (retint && (* num_errPt)){
1568             SeqIntFree(retint);
1569             retint = NULL;
1570         }
1571         ValNodeFree(retnode);
1572         retnode = NULL;
1573         goto RETURN;
1574 }
1575 
1576 /*------------------- Nlm_gbpintpnt()-----------*/
1577 
1578 NLM_EXTERN SeqPntPtr
Nlm_gbpintpnt(SeqLocPtr retnode,SeqIntPtr PNTR retintPt)1579 Nlm_gbpintpnt(SeqLocPtr retnode, SeqIntPtr PNTR retintPt)
1580 {
1581     SeqPntPtr point;
1582         point = SeqPntNew();
1583         point -> point = (*retintPt) -> from;
1584         point -> id = (*retintPt) -> id;
1585         (*retintPt) -> id = NULL;
1586         point -> fuzz = (*retintPt) -> if_from;
1587         (*retintPt) -> if_from = NULL;
1588         SeqIntFree((*retintPt));
1589         (*retintPt) = NULL;
1590         retnode -> choice = SEQLOC_PNT;
1591         retnode -> data.ptrvalue = point;
1592     return point;
1593 }
1594 
1595 /*----- Nlm_gbload_number() -----*/
1596 
1597 NLM_EXTERN void
Nlm_gbload_number(Int4 PNTR numPt,IntFuzzPtr PNTR fuzzPt,Boolean PNTR keep_rawPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,int take_which)1598 Nlm_gbload_number (Int4 PNTR numPt, IntFuzzPtr PNTR fuzzPt, Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, int take_which)
1599 {
1600     int num_found=0;
1601     int fuzz_err =0;
1602     Boolean strange_sin_dot = FALSE;
1603 
1604         if ((*currentPt ) -> choice == GBPARSE_INT_CARET){
1605             Nlm_gbparse_error("duplicate carets",
1606                     head_token, *currentPt);
1607             (*keep_rawPt) = TRUE; (*num_errPt) ++;
1608              *currentPt  =  (* currentPt)  -> next;
1609             fuzz_err = 1;
1610         }else if ((*currentPt ) -> choice == GBPARSE_INT_GT ||
1611                 (*currentPt ) -> choice == GBPARSE_INT_LT){
1612             if ( ! * fuzzPt){
1613                 * fuzzPt = IntFuzzNew();
1614             }
1615             (* fuzzPt) -> choice = 4;
1616             if ((*currentPt ) -> choice == GBPARSE_INT_GT ){
1617                 (* fuzzPt) -> a = 1; /* 'a' serves as "lim" for choice 4 */
1618             }else{
1619                 (* fuzzPt) -> a = 2;
1620             }
1621              *currentPt  =  (* currentPt)  -> next;
1622         }else if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1623             strange_sin_dot = TRUE;
1624              *currentPt  =  (* currentPt)  -> next;
1625                 if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1626                     if ( ! * fuzzPt){
1627                         * fuzzPt = IntFuzzNew();
1628                     }
1629                     (* fuzzPt)  -> b  = atoi((*currentPt ) -> data.ptrvalue)-1;
1630                     (* fuzzPt) -> choice = 2;
1631                     if ( take_which == TAKE_FIRST ){
1632                         * numPt = (* fuzzPt)  -> b;
1633                     }
1634                      *currentPt  =  (* currentPt)  -> next;
1635                     num_found=1;
1636                 }else{
1637                     fuzz_err =1;
1638                 }
1639                 if ((*currentPt ) -> choice != GBPARSE_INT_SINGLE_DOT ){
1640                     fuzz_err =1;
1641                 }else{
1642                      *currentPt  =  (* currentPt)  -> next;
1643                     if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1644                         (* fuzzPt)  -> a  = atoi((*currentPt ) -> data.ptrvalue)-1;
1645                         if ( take_which ==  TAKE_SECOND ){
1646                             * numPt = (* fuzzPt)  -> a;
1647                         }
1648                          *currentPt  =  (* currentPt)  -> next;
1649                     }else{
1650                         fuzz_err =1;
1651                     }
1652                     if ((*currentPt ) -> choice == GBPARSE_INT_RIGHT){
1653                          *currentPt  =  (* currentPt)  -> next;
1654                     }else{
1655                         fuzz_err =1;
1656                     }
1657                 }
1658 
1659         }else if ((*currentPt ) -> choice != GBPARSE_INT_NUMBER) {
1660 /* this prevents endless cycling, unconditionally */
1661         if ((*currentPt ) -> choice != GBPARSE_INT_ONE_OF
1662             && (*currentPt ) -> choice !=  GBPARSE_INT_ONE_OF_NUM)
1663              *currentPt  =  (* currentPt)  -> next;
1664             num_found = -1;
1665         }
1666 
1667         if ( ! strange_sin_dot){
1668             if ( ! * currentPt){
1669                     Nlm_gbparse_error("unexpected end of interval tokens",
1670                         head_token, *currentPt);
1671                             * keep_rawPt = TRUE;  (* num_errPt) ++;
1672             }else{
1673                  if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1674                     * numPt = atoi((*currentPt ) -> data.ptrvalue)-1;
1675                      *currentPt  =  (* currentPt)  -> next;
1676                     num_found=1;
1677                 }
1678             }
1679         }
1680 
1681     if ( fuzz_err){
1682                     Nlm_gbparse_error("Incorrect uncertainty",
1683                         head_token, *currentPt);
1684                     (*keep_rawPt) = TRUE; (*num_errPt) ++;
1685     }
1686     if ( num_found != 1){
1687                     (*keep_rawPt) = TRUE;
1688 /****************
1689  *
1690  *  10..one-of(13,15) type syntax here
1691  *
1692  ***************/
1693         if ((*currentPt ) -> choice == GBPARSE_INT_ONE_OF
1694                 || (*currentPt ) -> choice == GBPARSE_INT_ONE_OF_NUM){
1695             Boolean one_of_ok = TRUE;
1696             Boolean at_end_one_of = FALSE;
1697 
1698             *currentPt  =  (* currentPt)  -> next;
1699             if ((*currentPt ) -> choice != GBPARSE_INT_LEFT){
1700                 one_of_ok = FALSE;
1701             }else{
1702                 *currentPt  =  (* currentPt)  -> next;
1703             }
1704             if (one_of_ok &&  (*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1705                     * numPt = atoi((*currentPt ) -> data.ptrvalue)-1;
1706                      *currentPt  =  (* currentPt)  -> next;
1707             }else{
1708                 one_of_ok = FALSE;
1709             }
1710             while  (one_of_ok && ! at_end_one_of &&  *currentPt != NULL){
1711                 switch ( (*currentPt ) -> choice){
1712                     default:
1713                         one_of_ok = FALSE;
1714                         break;
1715                     case GBPARSE_INT_COMMA:
1716                     case GBPARSE_INT_NUMBER:
1717                      *currentPt  =  (* currentPt)  -> next;
1718                     break;
1719                     case GBPARSE_INT_RIGHT:
1720                      *currentPt  =  (* currentPt)  -> next;
1721                     at_end_one_of = TRUE;
1722                     break;
1723                 }
1724             }
1725             if ( ! one_of_ok && ! at_end_one_of){
1726                 while (! at_end_one_of && *currentPt != NULL){
1727                     if ((*currentPt ) -> choice == GBPARSE_INT_RIGHT){
1728                         at_end_one_of = TRUE;
1729                     }
1730                  *currentPt  =  (* currentPt)  -> next;
1731                 }
1732             }
1733 
1734             if ( ! one_of_ok){
1735                 Nlm_gbparse_error("bad one-of() syntax as number",
1736                     head_token, *currentPt);
1737                 (*num_errPt) ++;
1738             }
1739         }else{
1740             Nlm_gbparse_error("Number not found when expected",
1741                 head_token, *currentPt);
1742             (*num_errPt) ++;
1743         }
1744     }
1745 }
1746 
1747 /*----------------- Nlm_gbparse_better_be_done()-------------*/
1748 NLM_EXTERN SeqLocPtr
Nlm_gbparse_better_be_done(int PNTR num_errsPt,ValNodePtr current_token,ValNodePtr head_token,SeqLocPtr ret_so_far,Boolean PNTR keep_rawPt,int paren_count)1749 Nlm_gbparse_better_be_done(int PNTR num_errsPt, ValNodePtr current_token, ValNodePtr head_token, SeqLocPtr ret_so_far, Boolean PNTR keep_rawPt, int paren_count)
1750 {
1751     SeqLocPtr retval = ret_so_far;
1752 
1753         if ( current_token)
1754         while (current_token -> choice == GBPARSE_INT_RIGHT){
1755             paren_count --;
1756             current_token =  current_token -> next;
1757             if ( ! current_token){
1758                 if ( paren_count){
1759                     char par_msg[40];
1760                     sprintf(par_msg, "mismatched parentheses (%d)", paren_count);
1761                     Nlm_gbparse_error(par_msg,
1762                     head_token, current_token);
1763                     *keep_rawPt = TRUE;
1764                     (*num_errsPt) ++;
1765                 }
1766                 break;
1767             }
1768         }
1769         if ( paren_count){
1770                     Nlm_gbparse_error("text after last legal right parenthesis",
1771                     head_token, current_token);
1772                     *keep_rawPt = TRUE;
1773                     (*num_errsPt) ++;
1774         }
1775 
1776         if (current_token){
1777             Nlm_gbparse_error("text after end",
1778                     head_token, current_token);
1779             *keep_rawPt = TRUE;
1780             (*num_errsPt) ++;
1781         }
1782     return retval;
1783 }
1784 
1785 /*-------- Nlm_gbreplace() --------*/
1786 
1787 NLM_EXTERN SeqLocPtr
Nlm_gbreplace(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id)1788 Nlm_gbreplace (Boolean PNTR keep_rawPt, int PNTR parenPt, Boolean PNTR sitesPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
1789 {
1790     SeqLocPtr retval = NULL;
1791 
1792         * keep_rawPt = TRUE;
1793          *currentPt  =  (* currentPt)  -> next;
1794 
1795         if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1796              *currentPt  =  (* currentPt)  -> next;
1797             retval = Nlm_gbloc (keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1798                 num_errPt,seq_id);
1799             if ( ! * currentPt){
1800                     Nlm_gbparse_error("unexpected end of interval tokens",
1801                         head_token, *currentPt);
1802                             * keep_rawPt = TRUE;  (* num_errPt) ++;
1803             }else{
1804 
1805                 if ((*currentPt ) -> choice != GBPARSE_INT_COMMA){
1806                         Nlm_gbparse_error("Missing comma after first location in replace",
1807                         head_token, *currentPt);
1808                         (* num_errPt) ++;
1809                 }
1810             }
1811         }else{
1812                     Nlm_gbparse_error("Missing \'(\'" /* paran match  ) */
1813                         , head_token, *currentPt);
1814                     (* num_errPt) ++;
1815         }
1816     return retval;
1817 }
1818 
1819 
1820 
1821 /*-------- Nlm_gbreplace_ver() --------*/
1822 
Nlm_gbreplace_ver(Boolean PNTR keep_rawPt,int PNTR parenPt,Boolean PNTR sitesPt,ValNodePtr PNTR currentPt,ValNodePtr head_token,int PNTR num_errPt,SeqIdPtr seq_id,Boolean accver)1823 NLM_EXTERN SeqLocPtr Nlm_gbreplace_ver(Boolean PNTR keep_rawPt, int PNTR parenPt,
1824                                    Boolean PNTR sitesPt,
1825                                    ValNodePtr PNTR currentPt,
1826                                    ValNodePtr head_token, int PNTR num_errPt,
1827                                    SeqIdPtr seq_id, Boolean accver)
1828 {
1829     SeqLocPtr retval = NULL;
1830 
1831         * keep_rawPt = TRUE;
1832          *currentPt  =  (* currentPt)  -> next;
1833 
1834         if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1835              *currentPt  =  (* currentPt)  -> next;
1836             retval = Nlm_gbloc_ver (keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1837                 num_errPt,seq_id, accver);
1838             if ( ! * currentPt){
1839                     Nlm_gbparse_error("unexpected end of interval tokens",
1840                         head_token, *currentPt);
1841                             * keep_rawPt = TRUE;  (* num_errPt) ++;
1842             }else{
1843 
1844                 if ((*currentPt ) -> choice != GBPARSE_INT_COMMA){
1845                         Nlm_gbparse_error("Missing comma after first location in replace",
1846                         head_token, *currentPt);
1847                         (* num_errPt) ++;
1848                 }
1849             }
1850         }else{
1851                     Nlm_gbparse_error("Missing \'(\'" /* paran match  ) */
1852                         , head_token, *currentPt);
1853                     (* num_errPt) ++;
1854         }
1855     return retval;
1856 }
1857 
1858 /**********************************************************/
Nlm_gbparse_accprefix(CharPtr acc)1859 static int Nlm_gbparse_accprefix(CharPtr acc)
1860 {
1861     CharPtr p;
1862     int     ret;
1863 
1864     if(acc == NULL || *acc == '\0')
1865         return(0);
1866 
1867     for(p = acc; IS_ALPHA(*p) != 0;)
1868         p++;
1869     ret = p - acc;
1870     if(*p == '_')
1871     {
1872         if(ret == 2)
1873         {
1874             for(p++; IS_ALPHA(*p) != 0;)
1875                 p++;
1876             ret = p - acc;
1877             if(ret != 3 && ret != 7)
1878                 ret = 1;
1879         }
1880         else
1881             ret = 1;
1882     }
1883     else if(p[0] != '\0' && p[0] >= '0' && p[0] <= '9' &&
1884            p[1] != '\0' && p[1] >= '0' && p[1] <= '9' && p[2] == 'S')
1885         ret = 7;
1886     else if(ret != 1 && ret != 2 && ret != 4)
1887         ret = 1;
1888     return(ret);
1889 }
1890 
1891 
1892 char Saved_ch;
1893 
1894 #define Nlm_lex_error_MACRO(msg)\
1895         if (current_col != NULL && *current_col){\
1896         Saved_ch = *(current_col +1);\
1897         *(current_col +1) = '\0';\
1898         }else{\
1899         Saved_ch='\0';\
1900         }\
1901         Nlm_gbparse_error(msg, & forerrmacro, & forerrmacro);\
1902         if (Saved_ch)\
1903         *(current_col +1) = Saved_ch;
1904 
1905 /*------------- gbparselex()-----------------------*/
1906 
1907 NLM_EXTERN int
Nlm_gbparselex(CharPtr linein,ValNodePtr PNTR lexed)1908 Nlm_gbparselex(CharPtr linein, ValNodePtr PNTR lexed)
1909 {
1910     CharPtr current_col=0, points_at_term_null,spare, line_use = NULL;
1911     int dex;
1912     int retval = 0, len;
1913     ValNodePtr current_token = NULL, last_token = NULL;
1914     Boolean skip_new_token=FALSE;
1915     Boolean die_now=FALSE;
1916     ValNode forerrmacro;
1917 
1918 
1919     if (linein == NULL) return retval;
1920 
1921     forerrmacro.choice =GBPARSE_INT_ACCESION ;
1922 
1923     if (*linein){
1924         len = StringLen(linein);
1925         line_use = MemNew(len + 1);
1926         StringCpy(line_use, linein);
1927         if ( * lexed){
1928                 Nlm_lex_error_MACRO( "Lex list not cleared on entry to Nlm_gbparselex")
1929             ValNodeFree( * lexed);
1930             * lexed = NULL;
1931         }
1932         current_col = line_use ;
1933         forerrmacro.data.ptrvalue = line_use;
1934 /*---------
1935  *   Clear terminal white space
1936  *---------*/
1937         points_at_term_null = line_use + len;
1938         spare = points_at_term_null - 1;
1939         while (*spare == ' '  || *spare == '\n' || *spare == '\r' || *spare == '~') {
1940             *spare-- = '\0';
1941             points_at_term_null --;
1942         }
1943 
1944 
1945         while (current_col < points_at_term_null && ! die_now) {
1946             if ( ! skip_new_token){
1947                 last_token = current_token;
1948                 current_token = ValNodeNew(current_token);
1949                 if ( ! * lexed)
1950                     * lexed = current_token;
1951             }
1952             switch ( *current_col){
1953 
1954             case '\"':
1955                 skip_new_token = FALSE;
1956                 current_token -> choice = GBPARSE_INT_STRING;
1957                 for (spare = current_col +1; spare < points_at_term_null;
1958                         spare ++) {
1959                     if ( *spare == '\"'){
1960                         break;
1961                     }
1962                 }
1963                 if (spare >= points_at_term_null){
1964                         Nlm_lex_error_MACRO( "unterminated string")
1965                         retval ++;
1966                 }else{
1967                     len = spare-current_col + 1;
1968                     current_token -> data.ptrvalue =
1969                         MemNew(len +2);
1970                     StringNCpy(current_token -> data.ptrvalue,
1971                         current_col,len);
1972                     current_col += len;
1973                 }
1974                     break;
1975 /*------
1976  *  NUMBER
1977  *------*/
1978             case '0': case '1': case '2': case '3': case '4':
1979             case '5': case '6': case '7': case '8': case '9':
1980                 skip_new_token = FALSE;
1981                 current_token -> choice = GBPARSE_INT_NUMBER;
1982                 for (dex=0, spare = current_col; isdigit((int) *spare); spare ++){
1983                     dex ++ ;
1984                 }
1985                 current_token -> data.ptrvalue = MemNew(dex+1);
1986                 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
1987                 current_col += dex -1;
1988                 break;
1989 /*------
1990  *  JOIN
1991  *------*/
1992             case 'j':
1993                 skip_new_token = FALSE;
1994                 current_token -> choice = GBPARSE_INT_JOIN;
1995                 if (StringNCmp(current_col,"join",(unsigned) 4)!=0){
1996                     Nlm_lex_error_MACRO( "\"join\" misspelled")
1997                     retval += 10;
1998                     for(;*current_col && *current_col != '('; current_col++)
1999                         ; /* vi match )   empty body*/
2000                     current_col -- ;  /* back up 'cause ++ follows */
2001                 }else{
2002                     current_col += 3;
2003                 }
2004                 break;
2005 
2006 /*------
2007  *  ORDER and ONE-OF
2008  *------*/
2009             case 'o':
2010                 skip_new_token = FALSE;
2011                 if (StringNCmp(current_col,"order",(unsigned) 5)!=0){
2012                     if (StringNCmp(current_col,"one-of",(unsigned) 6)!=0){
2013                     Nlm_lex_error_MACRO( "\"order\" or \"one-of\" misspelled")
2014                         retval ++;
2015                         for(;*current_col && *current_col != '('; current_col++)
2016                             ; /* vi match )   empty body*/
2017                         current_col -- ;  /* back up 'cause ++ follows */
2018                     }else{
2019                         current_token -> choice = GBPARSE_INT_ONE_OF ;
2020                         current_col += 5;
2021                     }
2022                 }else{
2023                     current_token -> choice = GBPARSE_INT_ORDER;
2024                     current_col += 4;
2025                 }
2026                 break;
2027 
2028 /*------
2029  *  REPLACE
2030  *------*/
2031             case 'r' :
2032                 skip_new_token = FALSE;
2033                 current_token -> choice = GBPARSE_INT_REPLACE ;
2034                 if (StringNCmp(current_col,"replace",(unsigned) 6)!=0){
2035                     Nlm_lex_error_MACRO( "\"replace\" misspelled")
2036                     retval ++;
2037                     for(;*current_col && *current_col != '('; current_col++)
2038                         ; /* vi match )   empty body*/
2039                     current_col -- ;  /* back up 'cause ++ follows */
2040                 }else{
2041                     current_col += 6;
2042                 }
2043                 break;
2044 
2045 /*------
2046  *  GAP or GROUP or GI
2047  *------*/
2048             case 'g':
2049                 skip_new_token = FALSE;
2050                 if(StringNCmp(current_col, "gap", 3) == 0 &&
2051                    (current_col[3] == '(' ||
2052                     current_col[3] == ' ' ||
2053                     current_col[3] == '\t' ||
2054                     current_col[3] == '\0'))
2055                 {
2056                     current_token->choice = GBPARSE_INT_GAP;
2057                     current_token->data.ptrvalue = MemNew(4);
2058                     StringCpy(current_token->data.ptrvalue, "gap");
2059                     if(StringNICmp(current_col + 3, "(unk", 4) == 0)
2060                     {
2061                     current_token->choice = GBPARSE_INT_UNK_GAP;
2062                     last_token = current_token;
2063                     current_token = ValNodeNew(current_token);
2064                     current_token->choice = GBPARSE_INT_LEFT;
2065                     current_col += 4;
2066                     }
2067                     current_col += 2;
2068                     break;
2069                 }
2070                 if(StringNCmp(current_col, "gi|", 3) == 0) {
2071                     current_token->choice = GBPARSE_INT_ACCESION;
2072                     current_col += 3;
2073                     for (; IS_DIGIT(*current_col); current_col++) ;
2074                     break;
2075                 }
2076                 current_token -> choice = GBPARSE_INT_GROUP;
2077                 if (StringNCmp(current_col,"group",(unsigned) 5)!=0){
2078                     Nlm_lex_error_MACRO("\"group\" misspelled")
2079                     retval ++;
2080                     for(;*current_col && *current_col != '('; current_col++)
2081                         ; /* vi match )   empty body*/
2082                     current_col -- ;  /* back up 'cause ++ follows */
2083                 }else{
2084                     current_col += 4;
2085                 }
2086                 break;
2087 
2088 /*------
2089  *  COMPLEMENT
2090  *------*/
2091             case 'c':
2092                 skip_new_token = FALSE;
2093                 current_token -> choice = GBPARSE_INT_COMPL;
2094                 if (StringNCmp(current_col,"complement",(unsigned) 10)!=0){
2095                     Nlm_lex_error_MACRO("\"complement\" misspelled")
2096                     retval += 10;
2097                     for(;*current_col && *current_col != '('; current_col++)
2098                         ; /* vi match )   empty body*/
2099                     current_col -- ;  /* back up 'cause ++ follows */
2100                 }else{
2101                     current_col += 9;
2102                 }
2103                 break;
2104 
2105 /*-------
2106  * internal bases ignored
2107  *---------*/
2108             case 'b':
2109             if (StringNCmp(current_col,"bases",(unsigned) 5)!=0){
2110                 goto ACCESSION;
2111             }else{
2112                 skip_new_token = TRUE;
2113                 current_col += 4;
2114             }
2115                 break;
2116 
2117 /*------
2118  *  ()^.,<>  (bases (sites
2119  *------*/
2120             case '(':
2121                 if (StringNCmp(current_col,"(base",(unsigned) 5)==0){
2122                     skip_new_token = FALSE;
2123                     current_token -> choice = GBPARSE_INT_JOIN;
2124                     current_col += 4;
2125                     if (*current_col != '\0')
2126                     if ( * (current_col +1) == 's')
2127                         current_col ++;
2128                     last_token = current_token;
2129                     current_token = ValNodeNew(current_token);
2130                     current_token -> choice = GBPARSE_INT_LEFT;
2131                 }else if (StringNCmp(current_col,"(sites",(unsigned) 5)==0){
2132                     skip_new_token = FALSE;
2133                     current_col += 5;
2134                     if (*current_col != '\0')
2135                     if ( * (current_col +1) == ')'){
2136                         current_col ++;
2137                         current_token -> choice = GBPARSE_INT_SITES;
2138                     }else{
2139                         current_token -> choice = GBPARSE_INT_SITES;
2140                         last_token = current_token;
2141                         current_token = ValNodeNew(current_token);
2142                         current_token -> choice = GBPARSE_INT_JOIN;
2143                         last_token = current_token;
2144                         current_token = ValNodeNew(current_token);
2145                         current_token -> choice = GBPARSE_INT_LEFT;
2146                         if (*current_col != '\0'){
2147                             if ( * (current_col +1) == ';'){
2148                                 current_col ++;
2149                             }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2150                                 current_col += 2;
2151                             }
2152                         }
2153                     }
2154                 }else{
2155                     skip_new_token = FALSE;
2156                     current_token -> choice = GBPARSE_INT_LEFT;
2157                 }
2158                 break;
2159 
2160             case ')':
2161                 skip_new_token = FALSE;
2162                 current_token -> choice = GBPARSE_INT_RIGHT;
2163 
2164                 break;
2165 
2166             case '^':
2167                 skip_new_token = FALSE;
2168                 current_token -> choice = GBPARSE_INT_CARET;
2169                 break;
2170 
2171                         case '-':
2172                 skip_new_token = FALSE;
2173                 current_token -> choice = GBPARSE_INT_DOT_DOT ;
2174                 break;
2175             case '.':
2176                 skip_new_token = FALSE;
2177                 if (StringNCmp(current_col,"..",(unsigned) 2)!=0){
2178                     current_token -> choice = GBPARSE_INT_SINGLE_DOT ;
2179                 }else{
2180                     current_token -> choice = GBPARSE_INT_DOT_DOT;
2181                     current_col ++ ;
2182                 }
2183                 break;
2184 
2185             case '>':
2186                 skip_new_token = FALSE;
2187                 current_token -> choice = GBPARSE_INT_GT;
2188                 break;
2189 
2190             case '<':
2191                 skip_new_token = FALSE;
2192                 current_token -> choice = GBPARSE_INT_LT;
2193 
2194                 break;
2195 
2196             case ';':
2197             case ',':
2198                 skip_new_token = FALSE;
2199                 current_token -> choice = GBPARSE_INT_COMMA;
2200                 break;
2201 
2202             case ' ': case '\t': case '\n': case '\r': case '~':
2203                 skip_new_token = TRUE;
2204                 break;
2205 
2206             case 't' :
2207             if (StringNCmp(current_col,"to",(unsigned) 2)!=0){
2208                 goto ACCESSION;
2209             }else{
2210                 skip_new_token = FALSE;
2211                 current_token -> choice = GBPARSE_INT_DOT_DOT;
2212                 current_col ++ ;
2213                 break;
2214             }
2215 
2216             case 's' :
2217             if (StringNCmp(current_col,"site",(unsigned) 4)!=0){
2218                 goto ACCESSION;
2219             }else{
2220                 skip_new_token = FALSE;
2221                 current_token -> choice = GBPARSE_INT_SITES;
2222                 current_col += 3 ;
2223                 if (*current_col != '\0')
2224                 if ( * (current_col +1) == 's')
2225                     current_col ++;
2226                 if (*current_col != '\0'){
2227                     if ( * (current_col +1) == ';'){
2228                         current_col ++;
2229                     }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2230                         current_col += 2;
2231                     }
2232                 }
2233                 break;
2234             }
2235 
2236 
2237  ACCESSION:
2238             default:
2239     /*-------
2240  * all GenBank accessions start with a capital letter
2241  * and then have numbers
2242     ------*/
2243 /* new accessions start with 2 capital letters !!  1997 */
2244 /* new accessions have .version !!  2/15/1999 */
2245                 skip_new_token = FALSE;
2246                 current_token -> choice = GBPARSE_INT_ACCESION;
2247                 dex = Nlm_gbparse_accprefix(current_col);
2248                 spare = current_col + dex;
2249                 for (; isdigit((int) *spare); spare ++){
2250                     dex ++ ;
2251                 }
2252                 if (*spare == '.') {
2253                     dex ++ ;
2254                     for (spare++; isdigit((int) *spare); spare ++){
2255                         dex ++ ;
2256                     }
2257                 }
2258                 if (*spare != ':'){
2259                     Nlm_lex_error_MACRO( "ACCESSION missing \":\"" )
2260                     retval += 10;
2261                     current_col --;
2262                 }
2263                 current_token -> data.ptrvalue = MemNew(dex+1);
2264                 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2265                 current_col += dex ;
2266 
2267 
2268         }
2269     /*--move to past last "good" character---*/
2270                 current_col ++;
2271             }
2272             if ( ! * lexed && current_token){
2273                 * lexed = current_token;
2274             }
2275             if (skip_new_token && current_token) {
2276 /*---------
2277  *   last node points to a null (blank or white space token)
2278  *-----------*/
2279                 if (last_token){
2280                     last_token -> next = NULL;
2281                 }else{
2282                     * lexed = NULL;
2283                 }
2284                 ValNodeFree(current_token);
2285             }
2286     }
2287     if ( line_use)
2288         MemFree(line_use);
2289 
2290     return retval;
2291 }
2292 
2293 
2294 /*------------- Nlm_gbparselex_ver() -----------------------*/
2295 
2296 NLM_EXTERN int
Nlm_gbparselex_ver(CharPtr linein,ValNodePtr PNTR lexed,Boolean accver)2297 Nlm_gbparselex_ver(CharPtr linein, ValNodePtr PNTR lexed, Boolean accver)
2298 {
2299     CharPtr current_col=0, points_at_term_null,spare, line_use = NULL;
2300     int dex;
2301     int retval = 0, len;
2302     ValNodePtr current_token = NULL, last_token = NULL;
2303     Boolean skip_new_token=FALSE;
2304     Boolean die_now=FALSE;
2305     ValNode forerrmacro;
2306 
2307     forerrmacro.choice =GBPARSE_INT_ACCESION ;
2308 
2309     if (*linein    ){
2310         len = StringLen(linein);
2311         line_use = MemNew(len + 1);
2312         StringCpy(line_use, linein);
2313         if ( * lexed){
2314                 Nlm_lex_error_MACRO( "Lex list not cleared on entry to Nlm_gbparselex_ver")
2315             ValNodeFree( * lexed);
2316             * lexed = NULL;
2317         }
2318         current_col = line_use ;
2319         forerrmacro.data.ptrvalue = line_use;
2320 /*---------
2321  *   Clear terminal white space
2322  *---------*/
2323         points_at_term_null = line_use + len;
2324         spare = points_at_term_null - 1;
2325         while (*spare == ' '  || *spare == '\n' || *spare == '\r' || *spare == '~') {
2326             *spare-- = '\0';
2327             points_at_term_null --;
2328         }
2329 
2330 
2331         while (current_col < points_at_term_null && ! die_now) {
2332             if ( ! skip_new_token){
2333                 last_token = current_token;
2334                 current_token = ValNodeNew(current_token);
2335                 if ( ! * lexed)
2336                     * lexed = current_token;
2337             }
2338             switch ( *current_col){
2339 
2340             case '\"':
2341                 skip_new_token = FALSE;
2342                 current_token -> choice = GBPARSE_INT_STRING;
2343                 for (spare = current_col +1; spare < points_at_term_null;
2344                         spare ++) {
2345                     if ( *spare == '\"'){
2346                         break;
2347                     }
2348                 }
2349                 if (spare >= points_at_term_null){
2350                         Nlm_lex_error_MACRO( "unterminated string")
2351                         retval ++;
2352                 }else{
2353                     len = spare-current_col + 1;
2354                     current_token -> data.ptrvalue =
2355                         MemNew(len +2);
2356                     StringNCpy(current_token -> data.ptrvalue,
2357                         current_col,len);
2358                     current_col += len;
2359                 }
2360                     break;
2361 /*------
2362  *  NUMBER
2363  *------*/
2364             case '0': case '1': case '2': case '3': case '4':
2365             case '5': case '6': case '7': case '8': case '9':
2366                 skip_new_token = FALSE;
2367                 current_token -> choice = GBPARSE_INT_NUMBER;
2368                 for (dex=0, spare = current_col; isdigit((int) *spare); spare ++){
2369                     dex ++ ;
2370                 }
2371                 current_token -> data.ptrvalue = MemNew(dex+1);
2372                 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2373                 current_col += dex -1;
2374                 break;
2375 /*------
2376  *  JOIN
2377  *------*/
2378             case 'j':
2379                 skip_new_token = FALSE;
2380                 current_token -> choice = GBPARSE_INT_JOIN;
2381                 if (StringNCmp(current_col,"join",(unsigned) 4)!=0){
2382                     Nlm_lex_error_MACRO( "\"join\" misspelled")
2383                     retval += 10;
2384                     for(;*current_col && *current_col != '('; current_col++)
2385                         ; /* vi match )   empty body*/
2386                     current_col -- ;  /* back up 'cause ++ follows */
2387                 }else{
2388                     current_col += 3;
2389                 }
2390                 break;
2391 
2392 /*------
2393  *  ORDER and ONE-OF
2394  *------*/
2395             case 'o':
2396                 skip_new_token = FALSE;
2397                 if (StringNCmp(current_col,"order",(unsigned) 5)!=0){
2398                     if (StringNCmp(current_col,"one-of",(unsigned) 6)!=0){
2399                     Nlm_lex_error_MACRO( "\"order\" or \"one-of\" misspelled")
2400                         retval ++;
2401                         for(;*current_col && *current_col != '('; current_col++)
2402                             ; /* vi match )   empty body*/
2403                         current_col -- ;  /* back up 'cause ++ follows */
2404                     }else{
2405                         current_token -> choice = GBPARSE_INT_ONE_OF ;
2406                         current_col += 5;
2407                     }
2408                 }else{
2409                     current_token -> choice = GBPARSE_INT_ORDER;
2410                     current_col += 4;
2411                 }
2412                 break;
2413 
2414 /*------
2415  *  REPLACE
2416  *------*/
2417             case 'r' :
2418                 skip_new_token = FALSE;
2419                 current_token -> choice = GBPARSE_INT_REPLACE ;
2420                 if (StringNCmp(current_col,"replace",(unsigned) 6)!=0){
2421                     Nlm_lex_error_MACRO( "\"replace\" misspelled")
2422                     retval ++;
2423                     for(;*current_col && *current_col != '('; current_col++)
2424                         ; /* vi match )   empty body*/
2425                     current_col -- ;  /* back up 'cause ++ follows */
2426                 }else{
2427                     current_col += 6;
2428                 }
2429                 break;
2430 
2431 /*------
2432  *  GAP or GROUP or GI
2433  *------*/
2434             case 'g':
2435                 skip_new_token = FALSE;
2436                 if(StringNCmp(current_col, "gap", 3) == 0 &&
2437                    (current_col[3] == '(' ||
2438                     current_col[3] == ' ' ||
2439                     current_col[3] == '\t' ||
2440                     current_col[3] == '\0'))
2441                 {
2442                     current_token->choice = GBPARSE_INT_GAP;
2443                     current_token->data.ptrvalue = MemNew(4);
2444                     StringCpy(current_token->data.ptrvalue, "gap");
2445                     if(StringNICmp(current_col + 3, "(unk", 4) == 0)
2446                     {
2447                     current_token->choice = GBPARSE_INT_UNK_GAP;
2448                     last_token = current_token;
2449                     current_token = ValNodeNew(current_token);
2450                     current_token->choice = GBPARSE_INT_LEFT;
2451                     current_col += 4;
2452                     }
2453                     current_col += 2;
2454                     break;
2455                 }
2456                 if(StringNCmp(current_col, "gi|", 3) == 0) {
2457                     current_token->choice = GBPARSE_INT_ACCESION;
2458                     current_col += 3;
2459                     for (; IS_DIGIT(*current_col); current_col++) ;
2460                     break;
2461                 }
2462                 current_token -> choice = GBPARSE_INT_GROUP;
2463                 if (StringNCmp(current_col,"group",(unsigned) 5)!=0){
2464                     Nlm_lex_error_MACRO("\"group\" misspelled")
2465                     retval ++;
2466                     for(;*current_col && *current_col != '('; current_col++)
2467                         ; /* vi match )   empty body*/
2468                     current_col -- ;  /* back up 'cause ++ follows */
2469                 }else{
2470                     current_col += 4;
2471                 }
2472                 break;
2473 
2474 /*------
2475  *  COMPLEMENT
2476  *------*/
2477             case 'c':
2478                 skip_new_token = FALSE;
2479                 current_token -> choice = GBPARSE_INT_COMPL;
2480                 if (StringNCmp(current_col,"complement",(unsigned) 10)!=0){
2481                     Nlm_lex_error_MACRO("\"complement\" misspelled")
2482                     retval += 10;
2483                     for(;*current_col && *current_col != '('; current_col++)
2484                         ; /* vi match )   empty body*/
2485                     current_col -- ;  /* back up 'cause ++ follows */
2486                 }else{
2487                     current_col += 9;
2488                 }
2489                 break;
2490 
2491 /*-------
2492  * internal bases ignored
2493  *---------*/
2494             case 'b':
2495             if (StringNCmp(current_col,"bases",(unsigned) 5)!=0){
2496                 goto ACCESSION;
2497             }else{
2498                 skip_new_token = TRUE;
2499                 current_col += 4;
2500             }
2501                 break;
2502 
2503 /*------
2504  *  ()^.,<>  (bases (sites
2505  *------*/
2506             case '(':
2507                 if (StringNCmp(current_col,"(base",(unsigned) 5)==0){
2508                     skip_new_token = FALSE;
2509                     current_token -> choice = GBPARSE_INT_JOIN;
2510                     current_col += 4;
2511                     if (*current_col != '\0')
2512                     if ( * (current_col +1) == 's')
2513                         current_col ++;
2514                     last_token = current_token;
2515                     current_token = ValNodeNew(current_token);
2516                     current_token -> choice = GBPARSE_INT_LEFT;
2517                 }else if (StringNCmp(current_col,"(sites",(unsigned) 5)==0){
2518                     skip_new_token = FALSE;
2519                     current_col += 5;
2520                     if (*current_col != '\0')
2521                     if ( * (current_col +1) == ')'){
2522                         current_col ++;
2523                         current_token -> choice = GBPARSE_INT_SITES;
2524                     }else{
2525                         current_token -> choice = GBPARSE_INT_SITES;
2526                         last_token = current_token;
2527                         current_token = ValNodeNew(current_token);
2528                         current_token -> choice = GBPARSE_INT_JOIN;
2529                         last_token = current_token;
2530                         current_token = ValNodeNew(current_token);
2531                         current_token -> choice = GBPARSE_INT_LEFT;
2532                         if (*current_col != '\0'){
2533                             if ( * (current_col +1) == ';'){
2534                                 current_col ++;
2535                             }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2536                                 current_col += 2;
2537                             }
2538                         }
2539                     }
2540                 }else{
2541                     skip_new_token = FALSE;
2542                     current_token -> choice = GBPARSE_INT_LEFT;
2543                 }
2544                 break;
2545 
2546             case ')':
2547                 skip_new_token = FALSE;
2548                 current_token -> choice = GBPARSE_INT_RIGHT;
2549 
2550                 break;
2551 
2552             case '^':
2553                 skip_new_token = FALSE;
2554                 current_token -> choice = GBPARSE_INT_CARET;
2555                 break;
2556 
2557                         case '-':
2558                 skip_new_token = FALSE;
2559                 current_token -> choice = GBPARSE_INT_DOT_DOT ;
2560                 break;
2561             case '.':
2562                 skip_new_token = FALSE;
2563                 if (StringNCmp(current_col,"..",(unsigned) 2)!=0){
2564                     current_token -> choice = GBPARSE_INT_SINGLE_DOT ;
2565                 }else{
2566                     current_token -> choice = GBPARSE_INT_DOT_DOT;
2567                     current_col ++ ;
2568                 }
2569                 break;
2570 
2571             case '>':
2572                 skip_new_token = FALSE;
2573                 current_token -> choice = GBPARSE_INT_GT;
2574                 break;
2575 
2576             case '<':
2577                 skip_new_token = FALSE;
2578                 current_token -> choice = GBPARSE_INT_LT;
2579 
2580                 break;
2581 
2582             case ';':
2583             case ',':
2584                 skip_new_token = FALSE;
2585                 current_token -> choice = GBPARSE_INT_COMMA;
2586                 break;
2587 
2588             case ' ': case '\t': case '\n': case '\r': case '~':
2589                 skip_new_token = TRUE;
2590                 break;
2591 
2592             case 't' :
2593             if (StringNCmp(current_col,"to",(unsigned) 2)!=0){
2594                 goto ACCESSION;
2595             }else{
2596                 skip_new_token = FALSE;
2597                 current_token -> choice = GBPARSE_INT_DOT_DOT;
2598                 current_col ++ ;
2599                 break;
2600             }
2601 
2602             case 's' :
2603             if (StringNCmp(current_col,"site",(unsigned) 4)!=0){
2604                 goto ACCESSION;
2605             }else{
2606                 skip_new_token = FALSE;
2607                 current_token -> choice = GBPARSE_INT_SITES;
2608                 current_col += 3 ;
2609                 if (*current_col != '\0')
2610                 if ( * (current_col +1) == 's')
2611                     current_col ++;
2612                 if (*current_col != '\0'){
2613                     if ( * (current_col +1) == ';'){
2614                         current_col ++;
2615                     }else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2616                         current_col += 2;
2617                     }
2618                 }
2619                 break;
2620             }
2621 
2622 
2623  ACCESSION:
2624             default:
2625     /*-------
2626  * all GenBank accessions start with a capital letter
2627  * and then have numbers
2628     ------*/
2629 /* new accessions start with 2 capital letters !!  1997 */
2630 /* new accessions have .version !!  2/15/1999 */
2631                 skip_new_token = FALSE;
2632                 current_token -> choice = GBPARSE_INT_ACCESION;
2633                 dex = Nlm_gbparse_accprefix(current_col);
2634                 spare = current_col + dex;
2635                 for (; isdigit((int) *spare); spare ++){
2636                     dex ++ ;
2637                 }
2638                 if (accver != FALSE && *spare == '.') {
2639                     dex ++ ;
2640                     for (spare++; isdigit((int) *spare); spare ++){
2641                         dex ++ ;
2642                     }
2643                 }
2644                 if (*spare != ':'){
2645                     Nlm_lex_error_MACRO( "ACCESSION missing \":\"" )
2646                     retval += 10;
2647                     current_col --;
2648                 }
2649                 current_token -> data.ptrvalue = MemNew(dex+1);
2650                 StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2651                 current_col += dex ;
2652 
2653 
2654         }
2655     /*--move to past last "good" character---*/
2656                 current_col ++;
2657             }
2658             if ( ! * lexed && current_token){
2659                 * lexed = current_token;
2660             }
2661             if (skip_new_token && current_token) {
2662 /*---------
2663  *   last node points to a null (blank or white space token)
2664  *-----------*/
2665                 if (last_token){
2666                     last_token -> next = NULL;
2667                 }else{
2668                     * lexed = NULL;
2669                 }
2670                 ValNodeFree(current_token);
2671             }
2672     }
2673     if ( line_use)
2674         MemFree(line_use);
2675 
2676     return retval;
2677 }
2678 
2679 
2680 /*---- non_white()----*/
2681 
2682 NLM_EXTERN CharPtr
Nlm_non_white(CharPtr ch)2683 Nlm_non_white(CharPtr ch)
2684 {
2685    while (isspace((int) *++ch))if (! *ch) break;
2686       ;
2687    return ch;
2688 }
2689 
2690 /*------ gbparse_lexfree()-------*/
2691 
2692 NLM_EXTERN ValNodePtr
Nlm_gbparse_lexfree(ValNodePtr anp)2693 Nlm_gbparse_lexfree(ValNodePtr anp)
2694 {
2695     ValNodePtr next;
2696 
2697    while (anp != NULL)
2698    {
2699       next = anp->next;
2700             if ( anp -> choice == GBPARSE_INT_NUMBER ||
2701                     anp -> choice == GBPARSE_INT_ACCESION){
2702                 MemFree(anp->data.ptrvalue);
2703             }
2704             MemFree(anp);
2705       anp = next;
2706    }
2707 
2708     return NULL;
2709 }
2710