1 /* gbparint.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  gbparint.c
27 *
28 * Author:  Alexey Dobronadezhdin (translated from gbparint.c made by Karl Sirotkin)
29 *
30 */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <corelib/ncbimisc.hpp>
35 #include <objects/seqloc/Seq_loc.hpp>
36 #include <objmgr/util/seq_loc_util.hpp>
37 #include <objects/seqloc/Seq_loc_equiv.hpp>
38 #include <objects/general/Dbtag.hpp>
39 #include <objects/general/Object_id.hpp>
40 #include "ftacpp.hpp"
41 #include "ftaerr.hpp"
42 #include "valnode.h"
43 #include "xgbparint.h"
44 
45 #ifdef THIS_FILE
46 #    undef THIS_FILE
47 #endif
48 #define THIS_FILE "xgbparint.cpp"
49 
50 #define TAKE_FIRST 1
51 #define TAKE_SECOND 2
52 
53 #define GBPARSE_INT_UNKNOWN 0
54 #define GBPARSE_INT_JOIN 1
55 #define GBPARSE_INT_COMPL 2
56 #define GBPARSE_INT_LEFT 3
57 #define GBPARSE_INT_RIGHT 4
58 #define GBPARSE_INT_CARET 5
59 #define GBPARSE_INT_DOT_DOT 6
60 #define GBPARSE_INT_ACCESION 7
61 #define GBPARSE_INT_GT 8
62 #define GBPARSE_INT_LT 9
63 #define GBPARSE_INT_COMMA 10
64 #define GBPARSE_INT_NUMBER 11
65 #define GBPARSE_INT_ORDER 12
66 #define GBPARSE_INT_SINGLE_DOT 13
67 #define GBPARSE_INT_GROUP 14
68 #define GBPARSE_INT_ONE_OF 15
69 #define GBPARSE_INT_REPLACE 16
70 #define GBPARSE_INT_SITES 17
71 #define GBPARSE_INT_STRING 18
72 #define GBPARSE_INT_ONE_OF_NUM 19
73 #define GBPARSE_INT_GAP 20
74 #define GBPARSE_INT_UNK_GAP 21
75 
76 #define ERR_NCBIGBPARSE_LEX 1
77 #define ERR_NCBIGBPARSE_INT 2
78 
79 BEGIN_NCBI_SCOPE
80 
81 const Char* seqlitdbtag = "SeqLit";
82 const Char* unkseqlitdbtag = "UnkSeqLit";
83 
84 /*--------- do_xgbparse_error () ---------------*/
85 
86 #define ERR_FEATURE_LocationParsing_validatr 1,5
87 
do_xgbparse_error(const Char * msg,const Char * details)88 static void do_xgbparse_error (const Char* msg, const Char* details)
89 {
90     size_t len = StringLen(msg) +7;
91     char* errmsg;
92     char* temp;
93 
94     len += StringLen(details);
95     temp = errmsg = static_cast<char*>(MemNew((size_t)len));
96     temp = StringMove(temp, msg);
97     temp = StringMove(temp, " at ");
98     temp = StringMove(temp, details);
99 
100     Nlm_ErrSetContext("validatr", __FILE__, __LINE__);
101     Nlm_ErrPostEx(SEV_ERROR, ERR_FEATURE_LocationParsing_validatr, errmsg);
102 
103     MemFree(errmsg);
104 }
105 
106 static X_gbparse_errfunc Err_func = do_xgbparse_error;
107 static X_gbparse_rangefunc Range_func = NULL;
108 static void* xgbparse_range_data = NULL;
109 
110 /*----------- xinstall_gbparse_error_handler ()-------------*/
111 
xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)112 void xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)
113 {
114     Err_func = new_func;
115 }
116 
117 /*----------- xinstall_gbparse_range_func ()-------------*/
118 
xinstall_gbparse_range_func(void * data,X_gbparse_rangefunc new_func)119 void xinstall_gbparse_range_func(void* data, X_gbparse_rangefunc new_func)
120 {
121     Range_func = new_func;
122     xgbparse_range_data = data;
123 }
124 
125 /*------ xgbparse_point ()----*/
126 
xgbparse_point(ValNodePtr head,ValNodePtr current)127 static char* xgbparse_point(ValNodePtr head, ValNodePtr current)
128 {
129     char* temp;
130     char* retval = 0;
131     size_t len = 0;
132     ValNodePtr now;
133 
134     for (now = head; now; now = now->next){
135         switch (now->choice){
136         case GBPARSE_INT_JOIN:
137             len += 4;
138             break;
139         case GBPARSE_INT_COMPL:
140             len += 10;
141             break;
142         case GBPARSE_INT_LEFT:
143         case GBPARSE_INT_RIGHT:
144         case GBPARSE_INT_CARET:
145         case GBPARSE_INT_GT:
146         case GBPARSE_INT_LT:
147         case GBPARSE_INT_COMMA:
148         case GBPARSE_INT_SINGLE_DOT:
149             len++;
150             break;
151         case GBPARSE_INT_DOT_DOT:
152             len += 2;
153             break;
154         case GBPARSE_INT_ACCESION:
155         case GBPARSE_INT_NUMBER:
156             len += StringLen(static_cast<char*>(now->data.ptrvalue));
157             break;
158         case GBPARSE_INT_ORDER:
159         case GBPARSE_INT_GROUP:
160             len += 5;
161             break;
162         case GBPARSE_INT_ONE_OF:
163         case GBPARSE_INT_ONE_OF_NUM:
164             len += 6;
165             break;
166         case GBPARSE_INT_REPLACE:
167             len += 7;
168             break;
169         case GBPARSE_INT_STRING:
170             len += StringLen(static_cast<char*>(now->data.ptrvalue)) + 1;
171             break;
172         case GBPARSE_INT_UNKNOWN:
173         default:
174             break;
175         }
176         len++; /* for space */
177 
178 
179         if (now == current)
180             break;
181     }
182 
183 
184     if (len > 0){
185         temp = retval = static_cast<char*>(MemNew(len + 1));
186         for (now = head; now; now = now->next){
187             switch (now->choice){
188             case GBPARSE_INT_JOIN:
189                 temp = StringMove(temp, "join");
190                 break;
191             case GBPARSE_INT_COMPL:
192                 temp = StringMove(temp, "complement");
193                 break;
194             case GBPARSE_INT_LEFT:
195                 temp = StringMove(temp, "(");
196                 break;
197             case GBPARSE_INT_RIGHT:
198                 temp = StringMove(temp, ")");
199                 break;
200             case GBPARSE_INT_CARET:
201                 temp = StringMove(temp, "^");
202                 break;
203             case GBPARSE_INT_DOT_DOT:
204                 temp = StringMove(temp, "..");
205                 break;
206             case GBPARSE_INT_ACCESION:
207             case GBPARSE_INT_NUMBER:
208             case GBPARSE_INT_STRING:
209                 temp = StringMove(temp, static_cast<char*>(now->data.ptrvalue));
210                 break;
211             case GBPARSE_INT_GT:
212                 temp = StringMove(temp, ">");
213                 break;
214             case GBPARSE_INT_LT:
215                 temp = StringMove(temp, "<");
216                 break;
217             case GBPARSE_INT_COMMA:
218                 temp = StringMove(temp, ",");
219                 break;
220             case GBPARSE_INT_ORDER:
221                 temp = StringMove(temp, "order");
222                 break;
223             case GBPARSE_INT_SINGLE_DOT:
224                 temp = StringMove(temp, ".");
225                 break;
226             case GBPARSE_INT_GROUP:
227                 temp = StringMove(temp, "group");
228                 break;
229             case GBPARSE_INT_ONE_OF:
230             case GBPARSE_INT_ONE_OF_NUM:
231                 temp = StringMove(temp, "one-of");
232                 break;
233             case GBPARSE_INT_REPLACE:
234                 temp = StringMove(temp, "replace");
235                 break;
236             case GBPARSE_INT_UNKNOWN:
237             default:
238                 break;
239             }
240             temp = StringMove(temp, " ");
241             if (now == current)
242                 break;
243         }
244     }
245 
246     return retval;
247 }
248 /*--------- xgbparse_error()-----------*/
249 
xgbparse_error(const Char * front,ValNodePtr head,ValNodePtr current)250 static void xgbparse_error(const Char* front, ValNodePtr head, ValNodePtr current)
251 {
252     char* details;
253 
254     details = xgbparse_point (head, current);
255     Err_func (front,details);
256     MemFree(details);
257 }
258 
259 /*------------------ xgbcheck_range()-------------*/
xgbcheck_range(TSeqPos num,const objects::CSeq_id & id,bool & keep_rawPt,int & num_errsPt,ValNodePtr head,ValNodePtr current)260 static void xgbcheck_range(TSeqPos num, const objects::CSeq_id& id, bool& keep_rawPt, int& num_errsPt, ValNodePtr head, ValNodePtr current)
261 {
262     TSeqPos len;
263     if (Range_func != NULL)
264     {
265         len = (*Range_func)(xgbparse_range_data, id);
266         if (len != static_cast<TSeqPos>(-1))
267         {
268             if (num >= len)
269             {
270                 xgbparse_error("range error", head, current);
271                 keep_rawPt = true;
272                 ++num_errsPt;
273             }
274         }
275     }
276 }
277 
278 
279 /*--------- xfind_one_of_num()------------*/
280 /*
281 
282 Consider these for locations:
283          misc_signal     join(57..one-of(67,75),one-of(100,110)..200)
284      misc_signal     join(57..one-of(67,75),one-of(100,110..120),200)
285      misc_signal     join(57..one-of(67,75),one-of(100,110..115)..200)
286 
287      misc_signal     join(57..one-of(67,75),one-of(100,110),200)
288 
289 In the first three, the one-of() is functioning as an alternative set
290 of numbers, in the last, as an alternative set of locations (even
291 though the locations are points).
292 [yes the one-of(100,110..115).. is illegal]
293 
294   here is one more case:one-of(18,30)..470 so if the location
295   starts with a one-of, it also needs to be checked.
296 
297 To deal with this, the GBPARSE_INT_ONE_OF token type will be changed
298 by the following function to GBPARSE_INT_ONE_OF_NUM, in the three cases.
299 
300 note that this change is not necessary in this case:
301         join(100..200,300..one-of(400,500)), as after a ".." token,
302     it has to be a number.
303 
304 */
305 
xfind_one_of_num(ValNodePtr head_token)306 static void xfind_one_of_num(ValNodePtr head_token)
307 {
308     ValNodePtr current, scanner;
309 
310     current = head_token;
311     if (current -> choice == GBPARSE_INT_ONE_OF){
312             scanner= current -> next;
313 /*-------(is first token after ")" a ".."?----*/
314             for (;scanner!=NULL; scanner = scanner -> next){
315                 if (scanner -> choice == GBPARSE_INT_RIGHT){
316                     scanner = scanner -> next;
317                     if (scanner != NULL){
318                         if (scanner -> choice == GBPARSE_INT_DOT_DOT){
319 /*---- this is it ! ! */
320                             current -> choice = GBPARSE_INT_ONE_OF_NUM;
321                         }
322                     }
323                     break;
324                 }
325             }
326     }
327     for (current = head_token; current != NULL; current = current -> next){
328         if ( current -> choice == GBPARSE_INT_COMMA ||
329             current -> choice == GBPARSE_INT_LEFT ){
330             scanner= current -> next;
331             if ( scanner != NULL){
332                 if (scanner -> choice == GBPARSE_INT_ONE_OF){
333 /*-------(is first token after ")" a ".."?----*/
334                     for (;scanner!=NULL; scanner = scanner -> next){
335                         if (scanner -> choice == GBPARSE_INT_RIGHT){
336                             scanner = scanner -> next;
337                             if (scanner != NULL){
338                                 if (scanner -> choice == GBPARSE_INT_DOT_DOT){
339 /*---- this is it ! ! */
340                                     current -> next -> choice
341                                         = GBPARSE_INT_ONE_OF_NUM;
342                                 }
343                             }
344                             break;
345                         }
346                     }
347                 }
348             }
349         }
350     }
351 
352 }
353 
354 
355 /**********************************************************/
xgbparse_accprefix(char * acc)356 static size_t xgbparse_accprefix(char* acc)
357 {
358     char* p;
359 
360     if (acc == NULL || *acc == '\0')
361         return(0);
362 
363     for (p = acc; IS_ALPHA(*p) != 0;)
364         p++;
365     size_t ret = p - acc;
366     if (*p == '_')
367     {
368         if (ret == 2)
369         {
370             for (p++; IS_ALPHA(*p) != 0;)
371                 p++;
372             ret = p - acc;
373             if (ret != 3 && ret != 7)
374                 ret = 1;
375         }
376         else
377             ret = 1;
378     }
379     else if (p[0] != '\0' && p[0] >= '0' && p[0] <= '9' &&
380              p[1] != '\0' && p[1] >= '0' && p[1] <= '9' && p[2] == 'S')
381              ret = 7;
382     else if (ret != 1 && ret != 2 && ret != 4 && ret != 6)
383         ret = 1;
384     return(ret);
385 }
386 
387 static char Saved_ch;
388 
389 #define xlex_error_MACRO(msg)\
390         if (current_col != NULL && *current_col){\
391         Saved_ch = *(current_col +1);\
392         *(current_col +1) = '\0';\
393                 }else{\
394         Saved_ch='\0';\
395                 }\
396         xgbparse_error(msg, & forerrmacro, & forerrmacro);\
397         if (Saved_ch)\
398         *(current_col +1) = Saved_ch;
399 
400 
401 /*------------- xgbparselex_ver() -----------------------*/
402 
xgbparselex_ver(char * linein,ValNodePtr * lexed,bool accver)403 static int xgbparselex_ver(char* linein, ValNodePtr* lexed, bool accver)
404 {
405     char* current_col = 0, *points_at_term_null, *spare, *line_use = 0;
406     size_t dex = 0,
407            retval = 0,
408            len = 0;
409 
410     ValNodePtr current_token = NULL,
411                last_token = NULL;
412 
413     bool skip_new_token = false;
414     bool die_now = false;
415     ValNode forerrmacro;
416 
417     forerrmacro.choice = GBPARSE_INT_ACCESION;
418 
419     if (*linein)
420     {
421         len = StringLen(linein);
422         line_use = static_cast<char*>(MemNew(len + 1));
423         StringCpy(line_use, linein);
424         if (*lexed)
425         {
426             xlex_error_MACRO("Lex list not cleared on entry to Nlm_gbparselex_ver")
427                 ValNodeFree(*lexed);
428             *lexed = NULL;
429         }
430         current_col = line_use;
431         forerrmacro.data.ptrvalue = line_use;
432 
433         /*---------
434         *   Clear terminal white space
435         *---------*/
436         points_at_term_null = line_use + len;
437         spare = points_at_term_null - 1;
438         while (*spare == ' ' || *spare == '\n' || *spare == '\r' || *spare == '~') {
439             *spare-- = '\0';
440             points_at_term_null--;
441         }
442 
443 
444         while (current_col < points_at_term_null && !die_now) {
445             if (!skip_new_token){
446                 last_token = current_token;
447                 current_token = ValNodeNew(current_token);
448                 if (!* lexed)
449                     * lexed = current_token;
450             }
451             switch (*current_col){
452 
453             case '\"':
454                 skip_new_token = false;
455                 current_token->choice = GBPARSE_INT_STRING;
456                 for (spare = current_col + 1; spare < points_at_term_null;
457                      spare++) {
458                     if (*spare == '\"'){
459                         break;
460                     }
461                 }
462                 if (spare >= points_at_term_null){
463                     xlex_error_MACRO("unterminated string")
464                         retval++;
465                 }
466                 else{
467                     len = spare - current_col + 1;
468                     current_token->data.ptrvalue =
469                         MemNew(len + 2);
470                     StringNCpy(static_cast<char*>(current_token->data.ptrvalue),
471                                current_col, len);
472                     current_col += len;
473                 }
474                 break;
475                 /*------
476                 *  NUMBER
477                 *------*/
478             case '0': case '1': case '2': case '3': case '4':
479             case '5': case '6': case '7': case '8': case '9':
480                 skip_new_token = false;
481                 current_token->choice = GBPARSE_INT_NUMBER;
482                 for (dex = 0, spare = current_col; isdigit((int)*spare); spare++){
483                     dex++;
484                 }
485                 current_token->data.ptrvalue = MemNew(dex + 1);
486                 StringNCpy(static_cast<char*>(current_token->data.ptrvalue), current_col, dex);
487                 current_col += dex - 1;
488                 break;
489                 /*------
490                 *  JOIN
491                 *------*/
492             case 'j':
493                 skip_new_token = false;
494                 current_token->choice = GBPARSE_INT_JOIN;
495                 if (StringNCmp(current_col, "join", (unsigned)4) != 0){
496                     xlex_error_MACRO("\"join\" misspelled")
497                         retval += 10;
498                     for (; *current_col && *current_col != '('; current_col++)
499                         ; /* vi match )   empty body*/
500                     current_col--;  /* back up 'cause ++ follows */
501                 }
502                 else{
503                     current_col += 3;
504                 }
505                 break;
506 
507                 /*------
508                 *  ORDER and ONE-OF
509                 *------*/
510             case 'o':
511                 skip_new_token = false;
512                 if (StringNCmp(current_col, "order", (unsigned)5) != 0){
513                     if (StringNCmp(current_col, "one-of", (unsigned)6) != 0){
514                         xlex_error_MACRO("\"order\" or \"one-of\" misspelled")
515                             retval++;
516                         for (; *current_col && *current_col != '('; current_col++)
517                             ; /* vi match )   empty body*/
518                         current_col--;  /* back up 'cause ++ follows */
519                     }
520                     else{
521                         current_token->choice = GBPARSE_INT_ONE_OF;
522                         current_col += 5;
523                     }
524                 }
525                 else{
526                     current_token->choice = GBPARSE_INT_ORDER;
527                     current_col += 4;
528                 }
529                 break;
530 
531                 /*------
532                 *  REPLACE
533                 *------*/
534             case 'r':
535                 skip_new_token = false;
536                 current_token->choice = GBPARSE_INT_REPLACE;
537                 if (StringNCmp(current_col, "replace", (unsigned)6) != 0){
538                     xlex_error_MACRO("\"replace\" misspelled")
539                         retval++;
540                     for (; *current_col && *current_col != '('; current_col++)
541                         ; /* vi match )   empty body*/
542                     current_col--;  /* back up 'cause ++ follows */
543                 }
544                 else{
545                     current_col += 6;
546                 }
547                 break;
548 
549                 /*------
550                 *  GAP or GROUP or GI
551                 *------*/
552             case 'g':
553                 skip_new_token = false;
554                 if (StringNCmp(current_col, "gap", 3) == 0 &&
555                     (current_col[3] == '(' ||
556                     current_col[3] == ' ' ||
557                     current_col[3] == '\t' ||
558                     current_col[3] == '\0'))
559                 {
560                     current_token->choice = GBPARSE_INT_GAP;
561                     current_token->data.ptrvalue = MemNew(4);
562                     StringCpy(static_cast<char*>(current_token->data.ptrvalue), "gap");
563                     if (StringNICmp(current_col + 3, "(unk", 4) == 0)
564                     {
565                         current_token->choice = GBPARSE_INT_UNK_GAP;
566                         last_token = current_token;
567                         current_token = ValNodeNew(current_token);
568                         current_token->choice = GBPARSE_INT_LEFT;
569                         current_col += 4;
570                     }
571                     current_col += 2;
572                     break;
573                 }
574                 if (StringNCmp(current_col, "gi|", 3) == 0) {
575                     current_token->choice = GBPARSE_INT_ACCESION;
576                     current_col += 3;
577                     for (; IS_DIGIT(*current_col); current_col++);
578                     break;
579                 }
580                 current_token->choice = GBPARSE_INT_GROUP;
581                 if (StringNCmp(current_col, "group", (unsigned)5) != 0){
582                     xlex_error_MACRO("\"group\" misspelled")
583                         retval++;
584                     for (; *current_col && *current_col != '('; current_col++)
585                         ; /* vi match )   empty body*/
586                     current_col--;  /* back up 'cause ++ follows */
587                 }
588                 else{
589                     current_col += 4;
590                 }
591                 break;
592 
593                 /*------
594                 *  COMPLEMENT
595                 *------*/
596             case 'c':
597                 skip_new_token = false;
598                 current_token->choice = GBPARSE_INT_COMPL;
599                 if (StringNCmp(current_col, "complement", (unsigned)10) != 0){
600                     xlex_error_MACRO("\"complement\" misspelled")
601                         retval += 10;
602                     for (; *current_col && *current_col != '('; current_col++)
603                         ; /* vi match )   empty body*/
604                     current_col--;  /* back up 'cause ++ follows */
605                 }
606                 else{
607                     current_col += 9;
608                 }
609                 break;
610 
611                 /*-------
612                 * internal bases ignored
613                 *---------*/
614             case 'b':
615                 if (StringNCmp(current_col, "bases", (unsigned)5) != 0){
616                     goto ACCESSION;
617                 }
618                 else{
619                     skip_new_token = true;
620                     current_col += 4;
621                 }
622                 break;
623 
624                 /*------
625                 *  ()^.,<>  (bases (sites
626                 *------*/
627             case '(':
628                 if (StringNCmp(current_col, "(base", (unsigned)5) == 0){
629                     skip_new_token = false;
630                     current_token->choice = GBPARSE_INT_JOIN;
631                     current_col += 4;
632                     if (*current_col != '\0')
633                         if (*(current_col + 1) == 's')
634                             current_col++;
635                     last_token = current_token;
636                     current_token = ValNodeNew(current_token);
637                     current_token->choice = GBPARSE_INT_LEFT;
638                 }
639                 else if (StringNCmp(current_col, "(sites", (unsigned)5) == 0){
640                     skip_new_token = false;
641                     current_col += 5;
642                     if (*current_col != '\0')
643                     {
644                         if (*(current_col + 1) == ')'){
645                             current_col++;
646                             current_token->choice = GBPARSE_INT_SITES;
647                         }
648                         else{
649                             current_token->choice = GBPARSE_INT_SITES;
650                             last_token = current_token;
651                             current_token = ValNodeNew(current_token);
652                             current_token->choice = GBPARSE_INT_JOIN;
653                             last_token = current_token;
654                             current_token = ValNodeNew(current_token);
655                             current_token->choice = GBPARSE_INT_LEFT;
656                             if (*current_col != '\0'){
657                                 if (*(current_col + 1) == ';'){
658                                     current_col++;
659                                 }
660                                 else if (StringNCmp(current_col + 1, " ;", (unsigned)2) == 0){
661                                     current_col += 2;
662                                 }
663                             }
664                         }
665                     }
666                 }
667                 else{
668                     skip_new_token = false;
669                     current_token->choice = GBPARSE_INT_LEFT;
670                 }
671                 break;
672 
673             case ')':
674                 skip_new_token = false;
675                 current_token->choice = GBPARSE_INT_RIGHT;
676 
677                 break;
678 
679             case '^':
680                 skip_new_token = false;
681                 current_token->choice = GBPARSE_INT_CARET;
682                 break;
683 
684             case '-':
685                 skip_new_token = false;
686                 current_token->choice = GBPARSE_INT_DOT_DOT;
687                 break;
688             case '.':
689                 skip_new_token = false;
690                 if (StringNCmp(current_col, "..", (unsigned)2) != 0){
691                     current_token->choice = GBPARSE_INT_SINGLE_DOT;
692                 }
693                 else{
694                     current_token->choice = GBPARSE_INT_DOT_DOT;
695                     current_col++;
696                 }
697                 break;
698 
699             case '>':
700                 skip_new_token = false;
701                 current_token->choice = GBPARSE_INT_GT;
702                 break;
703 
704             case '<':
705                 skip_new_token = false;
706                 current_token->choice = GBPARSE_INT_LT;
707 
708                 break;
709 
710             case ';':
711             case ',':
712                 skip_new_token = false;
713                 current_token->choice = GBPARSE_INT_COMMA;
714                 break;
715 
716             case ' ': case '\t': case '\n': case '\r': case '~':
717                 skip_new_token = true;
718                 break;
719 
720             case 't':
721                 if (StringNCmp(current_col, "to", (unsigned)2) != 0){
722                     goto ACCESSION;
723                 }
724                 else{
725                     skip_new_token = false;
726                     current_token->choice = GBPARSE_INT_DOT_DOT;
727                     current_col++;
728                     break;
729                 }
730 
731             case 's':
732                 if (StringNCmp(current_col, "site", (unsigned)4) != 0){
733                     goto ACCESSION;
734                 }
735                 else{
736                     skip_new_token = false;
737                     current_token->choice = GBPARSE_INT_SITES;
738                     current_col += 3;
739                     if (*current_col != '\0')
740                         if (*(current_col + 1) == 's')
741                             current_col++;
742                     if (*current_col != '\0'){
743                         if (*(current_col + 1) == ';'){
744                             current_col++;
745                         }
746                         else if (StringNCmp(current_col + 1, " ;", (unsigned)2) == 0){
747                             current_col += 2;
748                         }
749                     }
750                     break;
751                 }
752 
753 
754             ACCESSION:
755             default:
756                 /*-------
757                 * all GenBank accessions start with a capital letter
758                 * and then have numbers
759                 ------*/
760                 /* new accessions start with 2 capital letters !!  1997 */
761                 /* new accessions have .version !!  2/15/1999 */
762                 skip_new_token = false;
763                 current_token->choice = GBPARSE_INT_ACCESION;
764                 dex = xgbparse_accprefix(current_col);
765                 spare = current_col + dex;
766                 for (; isdigit((int)*spare); spare++){
767                     dex++;
768                 }
769                 if (accver && *spare == '.') {
770                     dex++;
771                     for (spare++; isdigit((int)*spare); spare++){
772                         dex++;
773                     }
774                 }
775                 if (*spare != ':'){
776                     xlex_error_MACRO("ACCESSION missing \":\"")
777                         retval += 10;
778                     current_col--;
779                 }
780                 current_token->data.ptrvalue = MemNew(dex + 1);
781                 StringNCpy(static_cast<char*>(current_token->data.ptrvalue), current_col, dex);
782                 current_col += dex;
783 
784 
785             }
786             /*--move to past last "good" character---*/
787             current_col++;
788         }
789         if (!* lexed && current_token){
790             *lexed = current_token;
791         }
792         if (skip_new_token && current_token) {
793             /*---------
794             *   last node points to a null (blank or white space token)
795             *-----------*/
796             if (last_token){
797                 last_token->next = NULL;
798             }
799             else{
800                 *lexed = NULL;
801             }
802             ValNodeFree(current_token);
803         }
804     }
805     if (line_use)
806         MemFree(line_use);
807 
808     return static_cast<int>(retval);
809 }
810 
811 /*----------------- xgbparse_better_be_done()-------------*/
xgbparse_better_be_done(int & num_errsPt,ValNodePtr current_token,ValNodePtr head_token,bool & keep_rawPt,int paren_count)812 static void xgbparse_better_be_done(int& num_errsPt, ValNodePtr current_token, ValNodePtr head_token, bool& keep_rawPt, int paren_count)
813 {
814     if (current_token)
815     {
816         while (current_token->choice == GBPARSE_INT_RIGHT)
817         {
818             paren_count--;
819             current_token = current_token->next;
820             if (!current_token)
821             {
822                 if (paren_count)
823                 {
824                     char par_msg[40];
825                     sprintf(par_msg, "mismatched parentheses (%d)", paren_count);
826                     xgbparse_error(par_msg,
827                                    head_token, current_token);
828                     keep_rawPt = true;
829                     ++num_errsPt;
830                 }
831                 break;
832             }
833         }
834     }
835 
836     if (paren_count)
837     {
838         xgbparse_error("text after last legal right parenthesis",
839                        head_token, current_token);
840         keep_rawPt = true;
841         ++num_errsPt;
842     }
843 
844     if (current_token)
845     {
846         xgbparse_error("text after end",
847                        head_token, current_token);
848         keep_rawPt = true;
849         ++num_errsPt;
850     }
851 }
852 
853 /**********************************************************
854 *
855 *   CRef<objects::CSeq_loc> XGapToSeqLocEx(range, unknown):
856 *
857 *      Gets the size of gap and constructs SeqLoc block with
858 *   $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
859 *
860 **********************************************************/
XGapToSeqLocEx(Int4 range,bool unknown)861 static CRef<objects::CSeq_loc> XGapToSeqLocEx(Int4 range, bool unknown)
862 {
863     CRef<objects::CSeq_loc> ret;
864 
865     if (range < 0)
866         return ret;
867 
868     ret.Reset(new objects::CSeq_loc);
869     if (range == 0)
870     {
871         ret->SetNull();
872         return ret;
873     }
874 
875     objects::CSeq_interval& interval = ret->SetInt();
876     interval.SetFrom(0);
877     interval.SetTo(range - 1);
878 
879     objects::CSeq_id& id = interval.SetId();
880     id.SetGeneral().SetDb(unknown ? unkseqlitdbtag : seqlitdbtag);
881     id.SetGeneral().SetTag().SetId(0);
882 
883     return ret;
884 }
885 
886 /**********************************************************/
xgbgap(ValNodePtr & currentPt,CRef<objects::CSeq_loc> & loc,bool unknown)887 static void xgbgap(ValNodePtr& currentPt, CRef<objects::CSeq_loc>& loc, bool unknown)
888 {
889     ValNodePtr vnp_first;
890     ValNodePtr vnp_second;
891     ValNodePtr vnp_third;
892 
893     vnp_first = currentPt->next;
894     if (vnp_first == NULL || vnp_first->choice != GBPARSE_INT_LEFT)
895         return;
896 
897     vnp_second = vnp_first->next;
898     if (vnp_second == NULL || (vnp_second->choice != GBPARSE_INT_NUMBER &&
899         vnp_second->choice != GBPARSE_INT_RIGHT))
900         return;
901 
902     if (vnp_second->choice == GBPARSE_INT_RIGHT)
903     {
904         loc->SetNull();
905     }
906     else
907     {
908         vnp_third = vnp_second->next;
909         if (vnp_third == NULL || vnp_third->choice != GBPARSE_INT_RIGHT)
910             return;
911 
912         CRef<objects::CSeq_loc> new_loc = XGapToSeqLocEx(atoi((char*)vnp_second->data.ptrvalue), unknown);
913         if (new_loc.Empty())
914             return;
915 
916         currentPt = currentPt->next;
917         loc = new_loc;
918     }
919 
920     currentPt = currentPt->next;
921     currentPt = currentPt->next;
922     currentPt = currentPt->next;
923 }
924 
925 /*------------------- xgbpintpnt()-----------*/
926 
xgbpintpnt(objects::CSeq_loc & loc)927 static void xgbpintpnt(objects::CSeq_loc& loc)
928 {
929     CRef<objects::CSeq_point> point(new objects::CSeq_point);
930 
931     point->SetPoint(loc.GetInt().GetFrom());
932 
933     if (loc.GetInt().IsSetId())
934         point->SetId(loc.SetInt().SetId());
935 
936     if (loc.GetInt().IsSetFuzz_from())
937         point->SetFuzz(loc.SetInt().SetFuzz_from());
938 
939     loc.SetPnt(*point);
940 }
941 
942 /*----- xgbload_number() -----*/
943 
xgbload_number(TSeqPos & numPt,objects::CInt_fuzz & fuzz,bool & keep_rawPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,int take_which)944 static void xgbload_number(TSeqPos& numPt, objects::CInt_fuzz& fuzz, bool& keep_rawPt, ValNodePtr& currentPt, ValNodePtr head_token, int& num_errPt, int take_which)
945 {
946     int num_found = 0;
947     int fuzz_err = 0;
948     bool strange_sin_dot = false;
949 
950     if (currentPt->choice == GBPARSE_INT_CARET)
951     {
952         xgbparse_error("duplicate carets", head_token, currentPt);
953         keep_rawPt = true;
954         ++num_errPt;
955         currentPt = currentPt->next;
956         fuzz_err = 1;
957     }
958     else if (currentPt->choice == GBPARSE_INT_GT ||
959              currentPt->choice == GBPARSE_INT_LT)
960     {
961         if (currentPt->choice == GBPARSE_INT_GT)
962             fuzz.SetLim(objects::CInt_fuzz::eLim_gt);
963         else
964             fuzz.SetLim(objects::CInt_fuzz::eLim_lt);
965 
966         currentPt = currentPt->next;
967     }
968     else if (currentPt->choice == GBPARSE_INT_LEFT)
969     {
970         strange_sin_dot = true;
971         currentPt = currentPt->next;
972         fuzz.SetRange();
973 
974         if (currentPt->choice == GBPARSE_INT_NUMBER)
975         {
976             fuzz.SetRange().SetMin(atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1);
977             if (take_which == TAKE_FIRST)
978             {
979                 numPt = fuzz.GetRange().GetMin();
980             }
981             currentPt = currentPt->next;
982             num_found = 1;
983         }
984         else
985             fuzz_err = 1;
986 
987         if (currentPt->choice != GBPARSE_INT_SINGLE_DOT)
988             fuzz_err = 1;
989         else
990         {
991             currentPt = currentPt->next;
992             if (currentPt->choice == GBPARSE_INT_NUMBER)
993             {
994                 fuzz.SetRange().SetMax(atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1);
995                 if (take_which == TAKE_SECOND)
996                 {
997                     numPt = fuzz.GetRange().GetMax();
998                 }
999                 currentPt = currentPt->next;
1000             }
1001             else
1002                 fuzz_err = 1;
1003 
1004             if (currentPt->choice == GBPARSE_INT_RIGHT)
1005                 currentPt = currentPt->next;
1006             else
1007                 fuzz_err = 1;
1008         }
1009 
1010     }
1011     else if (currentPt->choice != GBPARSE_INT_NUMBER)
1012     {
1013         /* this prevents endless cycling, unconditionally */
1014         if (currentPt->choice != GBPARSE_INT_ONE_OF
1015             && currentPt->choice != GBPARSE_INT_ONE_OF_NUM)
1016             currentPt = currentPt->next;
1017         num_found = -1;
1018     }
1019 
1020     if (!strange_sin_dot)
1021     {
1022         if (!currentPt)
1023         {
1024             xgbparse_error("unexpected end of interval tokens",
1025                            head_token, currentPt);
1026             keep_rawPt = true;
1027             ++num_errPt;
1028         }
1029         else{
1030             if (currentPt->choice == GBPARSE_INT_NUMBER)
1031             {
1032                 numPt = atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1;
1033                 currentPt = currentPt->next;
1034                 num_found = 1;
1035             }
1036         }
1037     }
1038 
1039     if (fuzz_err)
1040     {
1041         xgbparse_error("Incorrect uncertainty", head_token, currentPt);
1042         keep_rawPt = true;
1043         ++num_errPt;
1044     }
1045 
1046     if (num_found != 1)
1047     {
1048         keep_rawPt = true;
1049         /****************
1050         *
1051         *  10..one-of(13,15) type syntax here
1052         *
1053         ***************/
1054         if (currentPt->choice == GBPARSE_INT_ONE_OF
1055             || currentPt->choice == GBPARSE_INT_ONE_OF_NUM)
1056         {
1057             bool one_of_ok = true;
1058             bool at_end_one_of = false;
1059 
1060             currentPt = currentPt->next;
1061             if (currentPt->choice != GBPARSE_INT_LEFT)
1062             {
1063                 one_of_ok = false;
1064             }
1065             else
1066             {
1067                 currentPt = currentPt->next;
1068             }
1069 
1070             if (one_of_ok && currentPt->choice == GBPARSE_INT_NUMBER)
1071             {
1072                 numPt = atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1;
1073                 currentPt = currentPt->next;
1074             }
1075             else
1076             {
1077                 one_of_ok = false;
1078             }
1079 
1080             while (one_of_ok && !at_end_one_of &&  currentPt != NULL)
1081             {
1082                 switch (currentPt->choice)
1083                 {
1084                 default:
1085                     one_of_ok = false;
1086                     break;
1087                 case GBPARSE_INT_COMMA:
1088                 case GBPARSE_INT_NUMBER:
1089                     currentPt = currentPt->next;
1090                     break;
1091                 case GBPARSE_INT_RIGHT:
1092                     currentPt = currentPt->next;
1093                     at_end_one_of = true;
1094                     break;
1095                 }
1096             }
1097 
1098             if (!one_of_ok && !at_end_one_of)
1099             {
1100                 while (!at_end_one_of && currentPt != NULL)
1101                 {
1102                     if (currentPt->choice == GBPARSE_INT_RIGHT)
1103                         at_end_one_of = true;
1104                     currentPt = currentPt->next;
1105                 }
1106             }
1107 
1108             if (!one_of_ok){
1109 
1110                 xgbparse_error("bad one-of() syntax as number",
1111                                head_token, currentPt);
1112                 ++num_errPt;
1113             }
1114         }
1115         else
1116         {
1117             xgbparse_error("Number not found when expected",
1118                            head_token, currentPt);
1119             ++num_errPt;
1120         }
1121     }
1122 }
1123 
1124 /*--------------- xgbint_ver ()--------------------*/
1125 /* sometimes returns points */
xgbint_ver(bool & keep_rawPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,const TSeqIdList & seq_ids,bool accver)1126 static CRef<objects::CSeq_loc> xgbint_ver(bool& keep_rawPt, ValNodePtr& currentPt,
1127                                                       ValNodePtr head_token, int& num_errPt, const TSeqIdList& seq_ids,
1128                                                       bool accver)
1129 {
1130     CRef<objects::CSeq_loc> ret(new objects::CSeq_loc);
1131 
1132     bool took_choice = false;
1133     char* p;
1134 
1135     CRef<objects::CSeq_id> new_id;
1136     CRef<objects::CInt_fuzz> new_fuzz;
1137 
1138     if (currentPt->choice == GBPARSE_INT_ACCESION)
1139     {
1140         CRef<objects::CTextseq_id> text_id(new objects::CTextseq_id);
1141 
1142         if (accver == false)
1143         {
1144             text_id->SetAccession(static_cast<char*>(currentPt->data.ptrvalue));
1145         }
1146         else
1147         {
1148             p = StringChr(static_cast<char*>(currentPt->data.ptrvalue), '.');
1149             if (p == NULL)
1150             {
1151                 text_id->SetAccession(static_cast<char*>(currentPt->data.ptrvalue));
1152                 xgbparse_error("Missing accession's version",
1153                                head_token, currentPt);
1154             }
1155             else
1156             {
1157                 *p = '\0';
1158                 text_id->SetAccession(static_cast<char*>(currentPt->data.ptrvalue));
1159                 text_id->SetVersion(atoi(p + 1));
1160                 *p = '.';
1161             }
1162         }
1163 
1164         new_id.Reset(new objects::CSeq_id);
1165         if (!seq_ids.empty())
1166         {
1167             const objects::CSeq_id& first_id = *(*seq_ids.begin());
1168             if (first_id.IsEmbl())
1169             {
1170                 new_id->SetEmbl(*text_id);
1171                 took_choice = true;
1172             }
1173             else if (first_id.IsDdbj())
1174             {
1175                 new_id->SetDdbj(*text_id);
1176                 took_choice = true;
1177             }
1178         }
1179 
1180         if (!took_choice) // Genbank
1181             new_id->SetGenbank(*text_id);
1182 
1183         currentPt = currentPt->next;
1184         if (!currentPt)
1185         {
1186             xgbparse_error("Nothing after accession",
1187                            head_token, currentPt);
1188             new_id.Reset();
1189             keep_rawPt = true;
1190             ++num_errPt;
1191             goto FATAL;
1192         }
1193     }
1194     else
1195     {
1196         if (!seq_ids.empty())
1197             new_id = *seq_ids.begin();
1198     }
1199 
1200     if (currentPt->choice == GBPARSE_INT_LT)
1201     {
1202         new_fuzz.Reset(new objects::CInt_fuzz);
1203         new_fuzz->SetLim(objects::CInt_fuzz::eLim_lt);
1204 
1205         currentPt = currentPt->next;
1206         if (!currentPt)
1207         {
1208             xgbparse_error("Nothing after \'<\'",
1209                            head_token, currentPt);
1210             keep_rawPt = true;
1211             ++num_errPt;
1212             goto FATAL;
1213         }
1214     }
1215 
1216     if (!num_errPt)
1217     {
1218         switch (currentPt->choice)
1219         {
1220         case  GBPARSE_INT_ACCESION:
1221             if (new_id.NotEmpty())
1222             {
1223                 xgbparse_error("duplicate accessions",
1224                                head_token, currentPt);
1225                 keep_rawPt = true;
1226                 ++num_errPt;
1227                 goto FATAL;
1228             }
1229             break;
1230         case  GBPARSE_INT_CARET:
1231             xgbparse_error("caret (^) before number",
1232                            head_token, currentPt);
1233             keep_rawPt = true;
1234             ++num_errPt;
1235             goto FATAL;
1236         case  GBPARSE_INT_LT:
1237             if (new_id.NotEmpty())
1238             {
1239                 xgbparse_error("duplicate \'<\'",
1240                                head_token, currentPt);
1241                 keep_rawPt = true;
1242                 ++num_errPt;
1243                 goto FATAL;
1244             }
1245             break;
1246         case  GBPARSE_INT_GT:
1247         case  GBPARSE_INT_NUMBER:
1248         case  GBPARSE_INT_LEFT:
1249 
1250         case GBPARSE_INT_ONE_OF_NUM:
1251             if (new_fuzz.NotEmpty())
1252                 ret->SetInt().SetFuzz_from(*new_fuzz);
1253             if (new_id.NotEmpty())
1254                 ret->SetInt().SetId(*new_id);
1255 
1256             xgbload_number(ret->SetInt().SetFrom(), ret->SetInt().SetFuzz_from(),
1257                            keep_rawPt, currentPt, head_token,
1258                            num_errPt, TAKE_FIRST);
1259 
1260             if (ret->GetInt().GetFuzz_from().Which() == objects::CInt_fuzz::e_not_set)
1261                 ret->SetInt().ResetFuzz_from();
1262 
1263             xgbcheck_range(ret->GetInt().GetFrom(), *new_id, keep_rawPt, num_errPt, head_token, currentPt);
1264 
1265             if (!num_errPt)
1266             {
1267                 if (currentPt)
1268                 {
1269                     bool in_caret = false;
1270                     switch (currentPt->choice)
1271                     {
1272                     default:
1273                     case GBPARSE_INT_JOIN:
1274                     case GBPARSE_INT_COMPL:
1275                     case GBPARSE_INT_SINGLE_DOT:
1276                     case GBPARSE_INT_ORDER:
1277                     case GBPARSE_INT_GROUP:
1278                     case GBPARSE_INT_ACCESION:
1279                         xgbparse_error("problem with 2nd number",
1280                                        head_token, currentPt);
1281                         keep_rawPt = true;
1282                         ++num_errPt;
1283                         goto FATAL;
1284                     case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1285                         /*--------------but have a point, not an interval----*/
1286                         xgbpintpnt(*ret);
1287                         break;
1288 
1289                     case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1290                         xgbparse_error("Missing \'..\'",
1291                                        head_token, currentPt);;
1292                         keep_rawPt = true;
1293                         ++num_errPt;
1294                         goto FATAL;
1295                     case GBPARSE_INT_CARET:
1296                         if (ret->GetInt().IsSetFuzz_from())
1297                         {
1298                             xgbparse_error("\'<\' then \'^\'",
1299                                            head_token, currentPt);
1300                             keep_rawPt = true;
1301                             ++num_errPt;
1302                             goto FATAL;
1303                         }
1304 
1305                         ret->SetInt().SetFuzz_from().SetLim(objects::CInt_fuzz::eLim_tl);
1306                         ret->SetInt().SetFuzz_to().SetLim(objects::CInt_fuzz::eLim_tl);
1307                         in_caret = true;
1308                         /*---no break on purpose ---*/
1309 
1310                     case GBPARSE_INT_DOT_DOT:
1311                         currentPt = currentPt->next;
1312                         if (currentPt == NULL)
1313                         {
1314                             xgbparse_error("unexpected end of usable tokens",
1315                                            head_token, currentPt);
1316                             keep_rawPt = true;
1317                             ++num_errPt;
1318                             goto FATAL;
1319                         }
1320                         /*--no break on purpose here ---*/
1321                     case GBPARSE_INT_NUMBER:
1322                     case GBPARSE_INT_LEFT:
1323 
1324                     case GBPARSE_INT_ONE_OF_NUM:  /* unlikely, but ok */
1325 
1326                         if (currentPt->choice == GBPARSE_INT_RIGHT)
1327                         {
1328                             if (ret->GetInt().IsSetFuzz_from())
1329                             {
1330                                 xgbparse_error("\'^\' then \'>\'",
1331                                                head_token, currentPt);
1332                                 keep_rawPt = true;
1333                                 ++num_errPt;
1334                                 goto FATAL;
1335                             }
1336                         }
1337 
1338                         xgbload_number(ret->SetInt().SetTo(), ret->SetInt().SetFuzz_to(),
1339                                        keep_rawPt, currentPt, head_token,
1340                                        num_errPt, TAKE_SECOND);
1341                         if (ret->GetInt().GetFuzz_to().Which() == objects::CInt_fuzz::e_not_set)
1342                             ret->SetInt().ResetFuzz_to();
1343 
1344                         xgbcheck_range(ret->GetInt().GetTo(), *new_id, keep_rawPt, num_errPt, head_token, currentPt);
1345 
1346                         /*----------
1347                         *  The caret location implies a place (point) between two location.
1348                         *  This is not exactly captured by the ASN.1, but pretty close
1349                         *-------*/
1350                         if (in_caret)
1351                         {
1352                             TSeqPos to = ret->GetInt().GetTo();
1353 
1354                             xgbpintpnt(*ret);
1355                             objects::CSeq_point& point = ret->SetPnt();
1356                             if (point.GetPoint() + 1 == to)
1357                             {
1358                                 point.SetPoint(to); /* was essentailly correct */
1359                             }
1360                             else
1361                             {
1362                                 point.SetFuzz().SetRange().SetMax(to);
1363                                 point.SetFuzz().SetRange().SetMin(point.GetPoint());
1364                             }
1365                         }
1366 
1367                         if (ret->IsInt())
1368                         {
1369                             if (ret->GetInt().GetFrom() == ret->GetInt().GetTo() &&
1370                                 !ret->GetInt().IsSetFuzz_from() &&
1371                                 !ret->GetInt().IsSetFuzz_to())
1372                             {
1373                                 /*-------if interval really a point, make is so ----*/
1374                                 xgbpintpnt(*ret);
1375                             }
1376                         }
1377                     } /* end switch */
1378                 }
1379                 else
1380                 {
1381                     xgbpintpnt(*ret);
1382                 }
1383             }
1384             else
1385             {
1386                 goto FATAL;
1387             }
1388             break;
1389         default:
1390             xgbparse_error("No number when expected",
1391                            head_token, currentPt);
1392             keep_rawPt = true;
1393             ++num_errPt;
1394             goto FATAL;
1395 
1396         }
1397     }
1398 
1399 
1400 RETURN:
1401     return ret;
1402 
1403 FATAL:
1404     ret.Reset();
1405     goto RETURN;
1406 }
1407 
1408 
1409 /*---------- xgbloc_ver()-----*/
1410 
xgbloc_ver(bool & keep_rawPt,int & parenPt,bool & sitesPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,const TSeqIdList & seq_ids,bool accver)1411 static CRef<objects::CSeq_loc> xgbloc_ver(bool& keep_rawPt, int& parenPt,
1412                                                       bool& sitesPt, ValNodePtr& currentPt,
1413                                                       ValNodePtr head_token, int& num_errPt,
1414                                                       const TSeqIdList& seq_ids, bool accver)
1415 {
1416     CRef<objects::CSeq_loc> retval;
1417 
1418     bool add_nulls = false;
1419     ValNodePtr current_token = currentPt;
1420     bool did_complement = false;
1421     bool go_again;
1422 
1423     do
1424     {
1425         go_again = false;
1426         switch (current_token->choice)
1427         {
1428         case  GBPARSE_INT_COMPL:
1429             currentPt = currentPt->next;
1430             if (currentPt == NULL){
1431                 xgbparse_error("unexpected end of usable tokens",
1432                                head_token, currentPt);
1433                 keep_rawPt = true;
1434                 ++num_errPt;
1435                 goto FATAL;
1436             }
1437             if (currentPt->choice != GBPARSE_INT_LEFT){
1438                 xgbparse_error("Missing \'(\'", /* paran match  ) */
1439                                head_token, currentPt);
1440                 keep_rawPt = true;
1441                 ++num_errPt;
1442                 goto FATAL;
1443             }
1444             else{
1445                 ++parenPt; currentPt = currentPt->next;
1446                 if (!currentPt){
1447                     xgbparse_error("illegal null contents",
1448                                    head_token, currentPt);
1449                     keep_rawPt = true;
1450                     ++num_errPt;
1451                     goto FATAL;
1452                 }
1453                 else{
1454                     if (currentPt->choice == GBPARSE_INT_RIGHT){ /* paran match ( */
1455                         xgbparse_error("Premature \')\'",
1456                                        head_token, currentPt);
1457                         keep_rawPt = true;
1458                         ++num_errPt;
1459                         goto FATAL;
1460                     }
1461                     else{
1462                         retval = xgbloc_ver(keep_rawPt, parenPt, sitesPt, currentPt,
1463                                             head_token, num_errPt, seq_ids, accver);
1464 
1465                         if (retval.NotEmpty())
1466                             retval = objects::sequence::SeqLocRevCmpl(*retval, nullptr);
1467 
1468                         did_complement = true;
1469                         if (currentPt){
1470                             if (currentPt->choice != GBPARSE_INT_RIGHT){
1471                                 xgbparse_error("Missing \')\'",
1472                                                head_token, currentPt);
1473                                 keep_rawPt = true;
1474                                 ++num_errPt;
1475                                 goto FATAL;
1476                             }
1477                             else{
1478                                 --parenPt;
1479                                 currentPt = currentPt->next;
1480                             }
1481                         }
1482                         else{
1483                             xgbparse_error("Missing \')\'",
1484                                            head_token, currentPt);
1485                             keep_rawPt = true;
1486                             ++num_errPt;
1487                             goto FATAL;
1488                         }
1489                     }
1490                 }
1491             }
1492             break;
1493             /* REAL LOCS */
1494         case GBPARSE_INT_JOIN:
1495             retval.Reset(new objects::CSeq_loc);
1496             retval->SetMix();
1497             break;
1498         case  GBPARSE_INT_ORDER:
1499             retval.Reset(new objects::CSeq_loc);
1500             retval->SetMix();
1501             add_nulls = true;
1502             break;
1503         case  GBPARSE_INT_GROUP:
1504             retval.Reset(new objects::CSeq_loc);
1505             retval->SetMix();
1506             keep_rawPt = true;
1507             break;
1508         case  GBPARSE_INT_ONE_OF:
1509             retval.Reset(new objects::CSeq_loc);
1510             retval->SetEquiv();
1511             break;
1512 
1513             /* ERROR */
1514         case GBPARSE_INT_STRING:
1515             xgbparse_error("string in loc",
1516                            head_token, current_token);
1517             keep_rawPt = true;
1518             ++num_errPt;
1519             goto FATAL;
1520             /*--- no break on purpose---*/
1521         default:
1522         case  GBPARSE_INT_UNKNOWN:
1523         case  GBPARSE_INT_RIGHT:
1524         case  GBPARSE_INT_DOT_DOT:
1525         case  GBPARSE_INT_COMMA:
1526         case  GBPARSE_INT_SINGLE_DOT:
1527             xgbparse_error("illegal initial loc token",
1528                            head_token, currentPt);
1529             keep_rawPt = true;
1530             ++num_errPt;
1531             goto FATAL;
1532 
1533             /* Interval, occurs on recursion */
1534         case GBPARSE_INT_GAP:
1535             xgbgap(currentPt, retval, false);
1536             break;
1537         case GBPARSE_INT_UNK_GAP:
1538             xgbgap(currentPt, retval, true);
1539             break;
1540 
1541         case  GBPARSE_INT_ACCESION:
1542         case  GBPARSE_INT_CARET:
1543         case  GBPARSE_INT_GT:
1544         case  GBPARSE_INT_LT:
1545         case  GBPARSE_INT_NUMBER:
1546         case  GBPARSE_INT_LEFT:
1547 
1548         case GBPARSE_INT_ONE_OF_NUM:
1549 
1550             retval = xgbint_ver(keep_rawPt, currentPt, head_token, num_errPt, seq_ids, accver);
1551             break;
1552 
1553         case  GBPARSE_INT_REPLACE:
1554             /*-------illegal at this level --*/
1555             xgbparse_error("illegal replace",
1556                            head_token, currentPt);
1557             keep_rawPt = true;
1558             ++num_errPt;
1559             goto FATAL;
1560         case GBPARSE_INT_SITES:
1561             sitesPt = true;
1562             go_again = true;
1563             currentPt = currentPt->next;
1564             break;
1565         }
1566     } while (go_again && currentPt);
1567 
1568     if (!num_errPt)
1569     {
1570         if (retval.NotEmpty() && !retval->IsNull())
1571         {
1572             if (!retval->IsInt() && !retval->IsPnt()
1573                 && !did_complement)
1574             {
1575                 /*--------
1576                 * ONLY THE CHOICE has been set. the "join", etc. only has been noted
1577                 *----*/
1578                 currentPt = currentPt->next;
1579                 if (!currentPt)
1580                 {
1581                     xgbparse_error("unexpected end of interval tokens",
1582                                    head_token, currentPt);
1583                     keep_rawPt = true;
1584                     ++num_errPt;
1585                     goto FATAL;
1586                 }
1587                 else
1588                 {
1589                     if (currentPt->choice != GBPARSE_INT_LEFT)
1590                     {
1591                         xgbparse_error("Missing \'(\'",
1592                                        head_token, currentPt); /* paran match  ) */
1593                         keep_rawPt = true;
1594                         ++num_errPt;
1595                         goto FATAL;
1596                     }
1597                     else{
1598                         ++parenPt;
1599                         currentPt = currentPt->next;
1600                         if (!currentPt)
1601                         {
1602                             xgbparse_error("illegal null contents",
1603                                            head_token, currentPt);
1604                             keep_rawPt = true;
1605                             ++num_errPt;
1606                             goto FATAL;
1607                         }
1608                         else
1609                         {
1610                             if (currentPt->choice == GBPARSE_INT_RIGHT)
1611                             { /* paran match ( */
1612                                 xgbparse_error("Premature \')\'",
1613                                                head_token, currentPt);
1614                                 keep_rawPt = true;
1615                                 ++num_errPt;
1616                                 goto FATAL;
1617                             }
1618                             else
1619                             {
1620                                 while (!num_errPt && currentPt)
1621                                 {
1622                                     if (currentPt->choice == GBPARSE_INT_RIGHT)
1623                                     {
1624                                         while (currentPt->choice == GBPARSE_INT_RIGHT)
1625                                         {
1626                                             parenPt--;
1627                                             currentPt = currentPt->next;
1628                                             if (!currentPt)
1629                                                 break;
1630                                         }
1631                                         break;
1632                                     }
1633 
1634                                     if (!currentPt)
1635                                         break;
1636 
1637                                     CRef<objects::CSeq_loc> next_loc = xgbloc_ver(keep_rawPt, parenPt, sitesPt,
1638                                                                                               currentPt, head_token, num_errPt,
1639                                                                                               seq_ids, accver);
1640 
1641                                     if (next_loc.NotEmpty())
1642                                     {
1643                                         if (retval->IsMix())
1644                                             retval->SetMix().AddSeqLoc(*next_loc);
1645                                         else // equiv
1646                                             retval->SetEquiv().Add(*next_loc);
1647                                     }
1648 
1649                                     if (!currentPt || currentPt->choice == GBPARSE_INT_RIGHT)
1650                                         break;
1651 
1652                                     if (currentPt->choice == GBPARSE_INT_COMMA)
1653                                     {
1654                                         currentPt = currentPt->next;
1655                                         if (add_nulls)
1656                                         {
1657                                             CRef<objects::CSeq_loc> null_loc(new objects::CSeq_loc);
1658                                             null_loc->SetNull();
1659 
1660                                             if (retval->IsMix())
1661                                                 retval->SetMix().AddSeqLoc(*null_loc);
1662                                             else // equiv
1663                                                 retval->SetEquiv().Add(*null_loc);
1664                                         }
1665                                     }
1666                                     else{
1667                                         xgbparse_error("Illegal token after interval",
1668                                                        head_token, currentPt);
1669                                         keep_rawPt = true;
1670                                         ++num_errPt;
1671                                         goto FATAL;
1672                                     }
1673                                 }
1674                             }
1675                         }
1676                         if (currentPt == NULL)
1677                         {
1678                             xgbparse_error("unexpected end of usable tokens",
1679                                            head_token, currentPt);
1680                             keep_rawPt = true;
1681                             ++num_errPt;
1682                             goto FATAL;
1683                         }
1684                         else
1685                         {
1686                             if (currentPt->choice != GBPARSE_INT_RIGHT)
1687                             {
1688                                 xgbparse_error("Missing \')\'" /* paran match  ) */,
1689                                                head_token, currentPt);
1690                                 keep_rawPt = true;
1691                                 ++num_errPt;
1692                                 goto FATAL;
1693                             }
1694                             else
1695                             {
1696                                 parenPt--;
1697                                 currentPt = currentPt->next;
1698                             }
1699                         }
1700                     }
1701                 }
1702             }
1703         }
1704     }
1705 
1706 FATAL:
1707     if (num_errPt)
1708     {
1709         if (retval.NotEmpty())
1710         {
1711             retval->Reset();
1712             retval->SetWhole().Assign(*(*seq_ids.begin()));
1713         }
1714     }
1715 
1716     return retval;
1717 }
1718 
1719 /*-------- xgbreplace_ver() --------*/
1720 
xgbreplace_ver(bool & keep_rawPt,int & parenPt,bool & sitesPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,const TSeqIdList & seq_ids,bool accver)1721 static CRef<objects::CSeq_loc> xgbreplace_ver(bool& keep_rawPt, int& parenPt,
1722                                                           bool& sitesPt, ValNodePtr& currentPt,
1723                                                           ValNodePtr head_token, int& num_errPt,
1724                                                           const TSeqIdList& seq_ids, bool accver)
1725 {
1726     CRef<objects::CSeq_loc> ret;
1727 
1728     keep_rawPt = true;
1729     currentPt = currentPt->next;
1730 
1731     if (currentPt->choice == GBPARSE_INT_LEFT)
1732     {
1733         currentPt = currentPt->next;
1734         ret = xgbloc_ver(keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1735                          num_errPt, seq_ids, accver);
1736         if (!currentPt)
1737         {
1738             xgbparse_error("unexpected end of interval tokens",
1739                            head_token, currentPt);
1740             keep_rawPt = true;
1741             ++num_errPt;
1742         }
1743         else
1744         {
1745 
1746             if (currentPt->choice != GBPARSE_INT_COMMA)
1747             {
1748                 xgbparse_error("Missing comma after first location in replace",
1749                                head_token, currentPt);
1750                 ++num_errPt;
1751             }
1752         }
1753     }
1754     else
1755     {
1756         xgbparse_error("Missing \'(\'" /* paran match  ) */
1757                        , head_token, currentPt);
1758         ++num_errPt;
1759     }
1760 
1761     return ret;
1762 }
1763 
1764 /*---------- xgbparseint_ver()-----*/
1765 
xgbparseint_ver(char * raw_intervals,bool & keep_rawPt,bool & sitesPt,int & num_errsPt,const TSeqIdList & seq_ids,bool accver)1766 CRef<objects::CSeq_loc> xgbparseint_ver(char* raw_intervals, bool& keep_rawPt, bool& sitesPt, int& num_errsPt,
1767                                                     const TSeqIdList& seq_ids, bool accver)
1768 {
1769     CRef<objects::CSeq_loc> ret;
1770 
1771     int paren_count = 0;
1772     bool go_again = false;
1773 
1774     keep_rawPt = false;
1775     sitesPt = false;
1776 
1777     ValNodePtr head_token = NULL,
1778                current_token = NULL;
1779 
1780     num_errsPt = xgbparselex_ver(raw_intervals, &head_token, accver);
1781 
1782     if (head_token == NULL)
1783     {
1784         num_errsPt = 1;
1785         return ret;
1786     }
1787 
1788     if ( !num_errsPt)
1789     {
1790         current_token = head_token;
1791         xfind_one_of_num(head_token);
1792 
1793         do
1794         {
1795             go_again = false;
1796             if (current_token)
1797             {
1798                 switch (current_token->choice)
1799                 {
1800                 case GBPARSE_INT_JOIN:
1801                 case GBPARSE_INT_ORDER:
1802                 case GBPARSE_INT_GROUP:
1803                 case GBPARSE_INT_ONE_OF:
1804                 case GBPARSE_INT_COMPL:
1805                     ret = xgbloc_ver(keep_rawPt, paren_count, sitesPt, current_token,
1806                                      head_token, num_errsPt, seq_ids, accver);
1807                     /* need to check that out of tokens here */
1808                     xgbparse_better_be_done(num_errsPt, current_token, head_token, keep_rawPt, paren_count);
1809                     break;
1810 
1811                 case GBPARSE_INT_STRING:
1812                     xgbparse_error("string in loc", head_token, current_token);
1813                     keep_rawPt = true;
1814                     ++num_errsPt;
1815                     /*  no break on purpose */
1816                 case  GBPARSE_INT_UNKNOWN:
1817                 default:
1818                 case  GBPARSE_INT_RIGHT:
1819                 case  GBPARSE_INT_DOT_DOT:
1820                 case  GBPARSE_INT_COMMA:
1821                 case  GBPARSE_INT_SINGLE_DOT:
1822 
1823                     xgbparse_error("illegal initial token", head_token, current_token);
1824                     keep_rawPt = true;
1825                     ++num_errsPt;
1826                     current_token = current_token->next;
1827                     break;
1828 
1829                 case  GBPARSE_INT_ACCESION:
1830                     /*--- no warn, but strange ---*/
1831                     /*-- no break on purpose ---*/
1832 
1833                 case  GBPARSE_INT_CARET: case  GBPARSE_INT_GT:
1834                 case  GBPARSE_INT_LT: case  GBPARSE_INT_NUMBER:
1835                 case  GBPARSE_INT_LEFT:
1836 
1837                 case GBPARSE_INT_ONE_OF_NUM:
1838 
1839                     ret = xgbint_ver(keep_rawPt, current_token, head_token, num_errsPt, seq_ids, accver);
1840 
1841                     /* need to check that out of tokens here */
1842                     xgbparse_better_be_done(num_errsPt, current_token, head_token, keep_rawPt, paren_count);
1843                     break;
1844 
1845                 case  GBPARSE_INT_REPLACE:
1846                     ret = xgbreplace_ver(keep_rawPt, paren_count, sitesPt, current_token,
1847                                          head_token, num_errsPt, seq_ids, accver);
1848                     keep_rawPt = true;
1849                     /*---all errors handled within this function ---*/
1850                     break;
1851                 case GBPARSE_INT_SITES:
1852                     sitesPt = true;
1853                     go_again = true;
1854                     current_token = current_token->next;
1855                     break;
1856                 }
1857             }
1858         } while (go_again && current_token);
1859     }
1860     else
1861     {
1862         keep_rawPt = true;
1863     }
1864 
1865     if ( head_token)
1866         ValNodeFreeData(head_token);
1867 
1868     if (num_errsPt)
1869         ret.Reset();
1870 
1871     return ret;
1872 }
1873 
1874 END_NCBI_SCOPE
1875