1 /* gbparint.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: gbparint.c
27 *
28 * Author: Alexey Dobronadezhdin (translated from gbparint.c made by Karl Sirotkin)
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33
34 #include <corelib/ncbimisc.hpp>
35 #include <objects/seqloc/Seq_loc.hpp>
36 #include <objmgr/util/seq_loc_util.hpp>
37 #include <objects/seqloc/Seq_loc_equiv.hpp>
38 #include <objects/general/Dbtag.hpp>
39 #include <objects/general/Object_id.hpp>
40 #include "ftacpp.hpp"
41 #include "ftaerr.hpp"
42 #include "valnode.h"
43 #include "xgbparint.h"
44
45 #ifdef THIS_FILE
46 # undef THIS_FILE
47 #endif
48 #define THIS_FILE "xgbparint.cpp"
49
50 #define TAKE_FIRST 1
51 #define TAKE_SECOND 2
52
53 #define GBPARSE_INT_UNKNOWN 0
54 #define GBPARSE_INT_JOIN 1
55 #define GBPARSE_INT_COMPL 2
56 #define GBPARSE_INT_LEFT 3
57 #define GBPARSE_INT_RIGHT 4
58 #define GBPARSE_INT_CARET 5
59 #define GBPARSE_INT_DOT_DOT 6
60 #define GBPARSE_INT_ACCESION 7
61 #define GBPARSE_INT_GT 8
62 #define GBPARSE_INT_LT 9
63 #define GBPARSE_INT_COMMA 10
64 #define GBPARSE_INT_NUMBER 11
65 #define GBPARSE_INT_ORDER 12
66 #define GBPARSE_INT_SINGLE_DOT 13
67 #define GBPARSE_INT_GROUP 14
68 #define GBPARSE_INT_ONE_OF 15
69 #define GBPARSE_INT_REPLACE 16
70 #define GBPARSE_INT_SITES 17
71 #define GBPARSE_INT_STRING 18
72 #define GBPARSE_INT_ONE_OF_NUM 19
73 #define GBPARSE_INT_GAP 20
74 #define GBPARSE_INT_UNK_GAP 21
75
76 #define ERR_NCBIGBPARSE_LEX 1
77 #define ERR_NCBIGBPARSE_INT 2
78
79 BEGIN_NCBI_SCOPE
80
81 const Char* seqlitdbtag = "SeqLit";
82 const Char* unkseqlitdbtag = "UnkSeqLit";
83
84 /*--------- do_xgbparse_error () ---------------*/
85
86 #define ERR_FEATURE_LocationParsing_validatr 1,5
87
do_xgbparse_error(const Char * msg,const Char * details)88 static void do_xgbparse_error (const Char* msg, const Char* details)
89 {
90 size_t len = StringLen(msg) +7;
91 char* errmsg;
92 char* temp;
93
94 len += StringLen(details);
95 temp = errmsg = static_cast<char*>(MemNew((size_t)len));
96 temp = StringMove(temp, msg);
97 temp = StringMove(temp, " at ");
98 temp = StringMove(temp, details);
99
100 Nlm_ErrSetContext("validatr", __FILE__, __LINE__);
101 Nlm_ErrPostEx(SEV_ERROR, ERR_FEATURE_LocationParsing_validatr, errmsg);
102
103 MemFree(errmsg);
104 }
105
106 static X_gbparse_errfunc Err_func = do_xgbparse_error;
107 static X_gbparse_rangefunc Range_func = NULL;
108 static void* xgbparse_range_data = NULL;
109
110 /*----------- xinstall_gbparse_error_handler ()-------------*/
111
xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)112 void xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)
113 {
114 Err_func = new_func;
115 }
116
117 /*----------- xinstall_gbparse_range_func ()-------------*/
118
xinstall_gbparse_range_func(void * data,X_gbparse_rangefunc new_func)119 void xinstall_gbparse_range_func(void* data, X_gbparse_rangefunc new_func)
120 {
121 Range_func = new_func;
122 xgbparse_range_data = data;
123 }
124
125 /*------ xgbparse_point ()----*/
126
xgbparse_point(ValNodePtr head,ValNodePtr current)127 static char* xgbparse_point(ValNodePtr head, ValNodePtr current)
128 {
129 char* temp;
130 char* retval = 0;
131 size_t len = 0;
132 ValNodePtr now;
133
134 for (now = head; now; now = now->next){
135 switch (now->choice){
136 case GBPARSE_INT_JOIN:
137 len += 4;
138 break;
139 case GBPARSE_INT_COMPL:
140 len += 10;
141 break;
142 case GBPARSE_INT_LEFT:
143 case GBPARSE_INT_RIGHT:
144 case GBPARSE_INT_CARET:
145 case GBPARSE_INT_GT:
146 case GBPARSE_INT_LT:
147 case GBPARSE_INT_COMMA:
148 case GBPARSE_INT_SINGLE_DOT:
149 len++;
150 break;
151 case GBPARSE_INT_DOT_DOT:
152 len += 2;
153 break;
154 case GBPARSE_INT_ACCESION:
155 case GBPARSE_INT_NUMBER:
156 len += StringLen(static_cast<char*>(now->data.ptrvalue));
157 break;
158 case GBPARSE_INT_ORDER:
159 case GBPARSE_INT_GROUP:
160 len += 5;
161 break;
162 case GBPARSE_INT_ONE_OF:
163 case GBPARSE_INT_ONE_OF_NUM:
164 len += 6;
165 break;
166 case GBPARSE_INT_REPLACE:
167 len += 7;
168 break;
169 case GBPARSE_INT_STRING:
170 len += StringLen(static_cast<char*>(now->data.ptrvalue)) + 1;
171 break;
172 case GBPARSE_INT_UNKNOWN:
173 default:
174 break;
175 }
176 len++; /* for space */
177
178
179 if (now == current)
180 break;
181 }
182
183
184 if (len > 0){
185 temp = retval = static_cast<char*>(MemNew(len + 1));
186 for (now = head; now; now = now->next){
187 switch (now->choice){
188 case GBPARSE_INT_JOIN:
189 temp = StringMove(temp, "join");
190 break;
191 case GBPARSE_INT_COMPL:
192 temp = StringMove(temp, "complement");
193 break;
194 case GBPARSE_INT_LEFT:
195 temp = StringMove(temp, "(");
196 break;
197 case GBPARSE_INT_RIGHT:
198 temp = StringMove(temp, ")");
199 break;
200 case GBPARSE_INT_CARET:
201 temp = StringMove(temp, "^");
202 break;
203 case GBPARSE_INT_DOT_DOT:
204 temp = StringMove(temp, "..");
205 break;
206 case GBPARSE_INT_ACCESION:
207 case GBPARSE_INT_NUMBER:
208 case GBPARSE_INT_STRING:
209 temp = StringMove(temp, static_cast<char*>(now->data.ptrvalue));
210 break;
211 case GBPARSE_INT_GT:
212 temp = StringMove(temp, ">");
213 break;
214 case GBPARSE_INT_LT:
215 temp = StringMove(temp, "<");
216 break;
217 case GBPARSE_INT_COMMA:
218 temp = StringMove(temp, ",");
219 break;
220 case GBPARSE_INT_ORDER:
221 temp = StringMove(temp, "order");
222 break;
223 case GBPARSE_INT_SINGLE_DOT:
224 temp = StringMove(temp, ".");
225 break;
226 case GBPARSE_INT_GROUP:
227 temp = StringMove(temp, "group");
228 break;
229 case GBPARSE_INT_ONE_OF:
230 case GBPARSE_INT_ONE_OF_NUM:
231 temp = StringMove(temp, "one-of");
232 break;
233 case GBPARSE_INT_REPLACE:
234 temp = StringMove(temp, "replace");
235 break;
236 case GBPARSE_INT_UNKNOWN:
237 default:
238 break;
239 }
240 temp = StringMove(temp, " ");
241 if (now == current)
242 break;
243 }
244 }
245
246 return retval;
247 }
248 /*--------- xgbparse_error()-----------*/
249
xgbparse_error(const Char * front,ValNodePtr head,ValNodePtr current)250 static void xgbparse_error(const Char* front, ValNodePtr head, ValNodePtr current)
251 {
252 char* details;
253
254 details = xgbparse_point (head, current);
255 Err_func (front,details);
256 MemFree(details);
257 }
258
259 /*------------------ xgbcheck_range()-------------*/
xgbcheck_range(TSeqPos num,const objects::CSeq_id & id,bool & keep_rawPt,int & num_errsPt,ValNodePtr head,ValNodePtr current)260 static void xgbcheck_range(TSeqPos num, const objects::CSeq_id& id, bool& keep_rawPt, int& num_errsPt, ValNodePtr head, ValNodePtr current)
261 {
262 TSeqPos len;
263 if (Range_func != NULL)
264 {
265 len = (*Range_func)(xgbparse_range_data, id);
266 if (len != static_cast<TSeqPos>(-1))
267 {
268 if (num >= len)
269 {
270 xgbparse_error("range error", head, current);
271 keep_rawPt = true;
272 ++num_errsPt;
273 }
274 }
275 }
276 }
277
278
279 /*--------- xfind_one_of_num()------------*/
280 /*
281
282 Consider these for locations:
283 misc_signal join(57..one-of(67,75),one-of(100,110)..200)
284 misc_signal join(57..one-of(67,75),one-of(100,110..120),200)
285 misc_signal join(57..one-of(67,75),one-of(100,110..115)..200)
286
287 misc_signal join(57..one-of(67,75),one-of(100,110),200)
288
289 In the first three, the one-of() is functioning as an alternative set
290 of numbers, in the last, as an alternative set of locations (even
291 though the locations are points).
292 [yes the one-of(100,110..115).. is illegal]
293
294 here is one more case:one-of(18,30)..470 so if the location
295 starts with a one-of, it also needs to be checked.
296
297 To deal with this, the GBPARSE_INT_ONE_OF token type will be changed
298 by the following function to GBPARSE_INT_ONE_OF_NUM, in the three cases.
299
300 note that this change is not necessary in this case:
301 join(100..200,300..one-of(400,500)), as after a ".." token,
302 it has to be a number.
303
304 */
305
xfind_one_of_num(ValNodePtr head_token)306 static void xfind_one_of_num(ValNodePtr head_token)
307 {
308 ValNodePtr current, scanner;
309
310 current = head_token;
311 if (current -> choice == GBPARSE_INT_ONE_OF){
312 scanner= current -> next;
313 /*-------(is first token after ")" a ".."?----*/
314 for (;scanner!=NULL; scanner = scanner -> next){
315 if (scanner -> choice == GBPARSE_INT_RIGHT){
316 scanner = scanner -> next;
317 if (scanner != NULL){
318 if (scanner -> choice == GBPARSE_INT_DOT_DOT){
319 /*---- this is it ! ! */
320 current -> choice = GBPARSE_INT_ONE_OF_NUM;
321 }
322 }
323 break;
324 }
325 }
326 }
327 for (current = head_token; current != NULL; current = current -> next){
328 if ( current -> choice == GBPARSE_INT_COMMA ||
329 current -> choice == GBPARSE_INT_LEFT ){
330 scanner= current -> next;
331 if ( scanner != NULL){
332 if (scanner -> choice == GBPARSE_INT_ONE_OF){
333 /*-------(is first token after ")" a ".."?----*/
334 for (;scanner!=NULL; scanner = scanner -> next){
335 if (scanner -> choice == GBPARSE_INT_RIGHT){
336 scanner = scanner -> next;
337 if (scanner != NULL){
338 if (scanner -> choice == GBPARSE_INT_DOT_DOT){
339 /*---- this is it ! ! */
340 current -> next -> choice
341 = GBPARSE_INT_ONE_OF_NUM;
342 }
343 }
344 break;
345 }
346 }
347 }
348 }
349 }
350 }
351
352 }
353
354
355 /**********************************************************/
xgbparse_accprefix(char * acc)356 static size_t xgbparse_accprefix(char* acc)
357 {
358 char* p;
359
360 if (acc == NULL || *acc == '\0')
361 return(0);
362
363 for (p = acc; IS_ALPHA(*p) != 0;)
364 p++;
365 size_t ret = p - acc;
366 if (*p == '_')
367 {
368 if (ret == 2)
369 {
370 for (p++; IS_ALPHA(*p) != 0;)
371 p++;
372 ret = p - acc;
373 if (ret != 3 && ret != 7)
374 ret = 1;
375 }
376 else
377 ret = 1;
378 }
379 else if (p[0] != '\0' && p[0] >= '0' && p[0] <= '9' &&
380 p[1] != '\0' && p[1] >= '0' && p[1] <= '9' && p[2] == 'S')
381 ret = 7;
382 else if (ret != 1 && ret != 2 && ret != 4 && ret != 6)
383 ret = 1;
384 return(ret);
385 }
386
387 static char Saved_ch;
388
389 #define xlex_error_MACRO(msg)\
390 if (current_col != NULL && *current_col){\
391 Saved_ch = *(current_col +1);\
392 *(current_col +1) = '\0';\
393 }else{\
394 Saved_ch='\0';\
395 }\
396 xgbparse_error(msg, & forerrmacro, & forerrmacro);\
397 if (Saved_ch)\
398 *(current_col +1) = Saved_ch;
399
400
401 /*------------- xgbparselex_ver() -----------------------*/
402
xgbparselex_ver(char * linein,ValNodePtr * lexed,bool accver)403 static int xgbparselex_ver(char* linein, ValNodePtr* lexed, bool accver)
404 {
405 char* current_col = 0, *points_at_term_null, *spare, *line_use = 0;
406 size_t dex = 0,
407 retval = 0,
408 len = 0;
409
410 ValNodePtr current_token = NULL,
411 last_token = NULL;
412
413 bool skip_new_token = false;
414 bool die_now = false;
415 ValNode forerrmacro;
416
417 forerrmacro.choice = GBPARSE_INT_ACCESION;
418
419 if (*linein)
420 {
421 len = StringLen(linein);
422 line_use = static_cast<char*>(MemNew(len + 1));
423 StringCpy(line_use, linein);
424 if (*lexed)
425 {
426 xlex_error_MACRO("Lex list not cleared on entry to Nlm_gbparselex_ver")
427 ValNodeFree(*lexed);
428 *lexed = NULL;
429 }
430 current_col = line_use;
431 forerrmacro.data.ptrvalue = line_use;
432
433 /*---------
434 * Clear terminal white space
435 *---------*/
436 points_at_term_null = line_use + len;
437 spare = points_at_term_null - 1;
438 while (*spare == ' ' || *spare == '\n' || *spare == '\r' || *spare == '~') {
439 *spare-- = '\0';
440 points_at_term_null--;
441 }
442
443
444 while (current_col < points_at_term_null && !die_now) {
445 if (!skip_new_token){
446 last_token = current_token;
447 current_token = ValNodeNew(current_token);
448 if (!* lexed)
449 * lexed = current_token;
450 }
451 switch (*current_col){
452
453 case '\"':
454 skip_new_token = false;
455 current_token->choice = GBPARSE_INT_STRING;
456 for (spare = current_col + 1; spare < points_at_term_null;
457 spare++) {
458 if (*spare == '\"'){
459 break;
460 }
461 }
462 if (spare >= points_at_term_null){
463 xlex_error_MACRO("unterminated string")
464 retval++;
465 }
466 else{
467 len = spare - current_col + 1;
468 current_token->data.ptrvalue =
469 MemNew(len + 2);
470 StringNCpy(static_cast<char*>(current_token->data.ptrvalue),
471 current_col, len);
472 current_col += len;
473 }
474 break;
475 /*------
476 * NUMBER
477 *------*/
478 case '0': case '1': case '2': case '3': case '4':
479 case '5': case '6': case '7': case '8': case '9':
480 skip_new_token = false;
481 current_token->choice = GBPARSE_INT_NUMBER;
482 for (dex = 0, spare = current_col; isdigit((int)*spare); spare++){
483 dex++;
484 }
485 current_token->data.ptrvalue = MemNew(dex + 1);
486 StringNCpy(static_cast<char*>(current_token->data.ptrvalue), current_col, dex);
487 current_col += dex - 1;
488 break;
489 /*------
490 * JOIN
491 *------*/
492 case 'j':
493 skip_new_token = false;
494 current_token->choice = GBPARSE_INT_JOIN;
495 if (StringNCmp(current_col, "join", (unsigned)4) != 0){
496 xlex_error_MACRO("\"join\" misspelled")
497 retval += 10;
498 for (; *current_col && *current_col != '('; current_col++)
499 ; /* vi match ) empty body*/
500 current_col--; /* back up 'cause ++ follows */
501 }
502 else{
503 current_col += 3;
504 }
505 break;
506
507 /*------
508 * ORDER and ONE-OF
509 *------*/
510 case 'o':
511 skip_new_token = false;
512 if (StringNCmp(current_col, "order", (unsigned)5) != 0){
513 if (StringNCmp(current_col, "one-of", (unsigned)6) != 0){
514 xlex_error_MACRO("\"order\" or \"one-of\" misspelled")
515 retval++;
516 for (; *current_col && *current_col != '('; current_col++)
517 ; /* vi match ) empty body*/
518 current_col--; /* back up 'cause ++ follows */
519 }
520 else{
521 current_token->choice = GBPARSE_INT_ONE_OF;
522 current_col += 5;
523 }
524 }
525 else{
526 current_token->choice = GBPARSE_INT_ORDER;
527 current_col += 4;
528 }
529 break;
530
531 /*------
532 * REPLACE
533 *------*/
534 case 'r':
535 skip_new_token = false;
536 current_token->choice = GBPARSE_INT_REPLACE;
537 if (StringNCmp(current_col, "replace", (unsigned)6) != 0){
538 xlex_error_MACRO("\"replace\" misspelled")
539 retval++;
540 for (; *current_col && *current_col != '('; current_col++)
541 ; /* vi match ) empty body*/
542 current_col--; /* back up 'cause ++ follows */
543 }
544 else{
545 current_col += 6;
546 }
547 break;
548
549 /*------
550 * GAP or GROUP or GI
551 *------*/
552 case 'g':
553 skip_new_token = false;
554 if (StringNCmp(current_col, "gap", 3) == 0 &&
555 (current_col[3] == '(' ||
556 current_col[3] == ' ' ||
557 current_col[3] == '\t' ||
558 current_col[3] == '\0'))
559 {
560 current_token->choice = GBPARSE_INT_GAP;
561 current_token->data.ptrvalue = MemNew(4);
562 StringCpy(static_cast<char*>(current_token->data.ptrvalue), "gap");
563 if (StringNICmp(current_col + 3, "(unk", 4) == 0)
564 {
565 current_token->choice = GBPARSE_INT_UNK_GAP;
566 last_token = current_token;
567 current_token = ValNodeNew(current_token);
568 current_token->choice = GBPARSE_INT_LEFT;
569 current_col += 4;
570 }
571 current_col += 2;
572 break;
573 }
574 if (StringNCmp(current_col, "gi|", 3) == 0) {
575 current_token->choice = GBPARSE_INT_ACCESION;
576 current_col += 3;
577 for (; IS_DIGIT(*current_col); current_col++);
578 break;
579 }
580 current_token->choice = GBPARSE_INT_GROUP;
581 if (StringNCmp(current_col, "group", (unsigned)5) != 0){
582 xlex_error_MACRO("\"group\" misspelled")
583 retval++;
584 for (; *current_col && *current_col != '('; current_col++)
585 ; /* vi match ) empty body*/
586 current_col--; /* back up 'cause ++ follows */
587 }
588 else{
589 current_col += 4;
590 }
591 break;
592
593 /*------
594 * COMPLEMENT
595 *------*/
596 case 'c':
597 skip_new_token = false;
598 current_token->choice = GBPARSE_INT_COMPL;
599 if (StringNCmp(current_col, "complement", (unsigned)10) != 0){
600 xlex_error_MACRO("\"complement\" misspelled")
601 retval += 10;
602 for (; *current_col && *current_col != '('; current_col++)
603 ; /* vi match ) empty body*/
604 current_col--; /* back up 'cause ++ follows */
605 }
606 else{
607 current_col += 9;
608 }
609 break;
610
611 /*-------
612 * internal bases ignored
613 *---------*/
614 case 'b':
615 if (StringNCmp(current_col, "bases", (unsigned)5) != 0){
616 goto ACCESSION;
617 }
618 else{
619 skip_new_token = true;
620 current_col += 4;
621 }
622 break;
623
624 /*------
625 * ()^.,<> (bases (sites
626 *------*/
627 case '(':
628 if (StringNCmp(current_col, "(base", (unsigned)5) == 0){
629 skip_new_token = false;
630 current_token->choice = GBPARSE_INT_JOIN;
631 current_col += 4;
632 if (*current_col != '\0')
633 if (*(current_col + 1) == 's')
634 current_col++;
635 last_token = current_token;
636 current_token = ValNodeNew(current_token);
637 current_token->choice = GBPARSE_INT_LEFT;
638 }
639 else if (StringNCmp(current_col, "(sites", (unsigned)5) == 0){
640 skip_new_token = false;
641 current_col += 5;
642 if (*current_col != '\0')
643 {
644 if (*(current_col + 1) == ')'){
645 current_col++;
646 current_token->choice = GBPARSE_INT_SITES;
647 }
648 else{
649 current_token->choice = GBPARSE_INT_SITES;
650 last_token = current_token;
651 current_token = ValNodeNew(current_token);
652 current_token->choice = GBPARSE_INT_JOIN;
653 last_token = current_token;
654 current_token = ValNodeNew(current_token);
655 current_token->choice = GBPARSE_INT_LEFT;
656 if (*current_col != '\0'){
657 if (*(current_col + 1) == ';'){
658 current_col++;
659 }
660 else if (StringNCmp(current_col + 1, " ;", (unsigned)2) == 0){
661 current_col += 2;
662 }
663 }
664 }
665 }
666 }
667 else{
668 skip_new_token = false;
669 current_token->choice = GBPARSE_INT_LEFT;
670 }
671 break;
672
673 case ')':
674 skip_new_token = false;
675 current_token->choice = GBPARSE_INT_RIGHT;
676
677 break;
678
679 case '^':
680 skip_new_token = false;
681 current_token->choice = GBPARSE_INT_CARET;
682 break;
683
684 case '-':
685 skip_new_token = false;
686 current_token->choice = GBPARSE_INT_DOT_DOT;
687 break;
688 case '.':
689 skip_new_token = false;
690 if (StringNCmp(current_col, "..", (unsigned)2) != 0){
691 current_token->choice = GBPARSE_INT_SINGLE_DOT;
692 }
693 else{
694 current_token->choice = GBPARSE_INT_DOT_DOT;
695 current_col++;
696 }
697 break;
698
699 case '>':
700 skip_new_token = false;
701 current_token->choice = GBPARSE_INT_GT;
702 break;
703
704 case '<':
705 skip_new_token = false;
706 current_token->choice = GBPARSE_INT_LT;
707
708 break;
709
710 case ';':
711 case ',':
712 skip_new_token = false;
713 current_token->choice = GBPARSE_INT_COMMA;
714 break;
715
716 case ' ': case '\t': case '\n': case '\r': case '~':
717 skip_new_token = true;
718 break;
719
720 case 't':
721 if (StringNCmp(current_col, "to", (unsigned)2) != 0){
722 goto ACCESSION;
723 }
724 else{
725 skip_new_token = false;
726 current_token->choice = GBPARSE_INT_DOT_DOT;
727 current_col++;
728 break;
729 }
730
731 case 's':
732 if (StringNCmp(current_col, "site", (unsigned)4) != 0){
733 goto ACCESSION;
734 }
735 else{
736 skip_new_token = false;
737 current_token->choice = GBPARSE_INT_SITES;
738 current_col += 3;
739 if (*current_col != '\0')
740 if (*(current_col + 1) == 's')
741 current_col++;
742 if (*current_col != '\0'){
743 if (*(current_col + 1) == ';'){
744 current_col++;
745 }
746 else if (StringNCmp(current_col + 1, " ;", (unsigned)2) == 0){
747 current_col += 2;
748 }
749 }
750 break;
751 }
752
753
754 ACCESSION:
755 default:
756 /*-------
757 * all GenBank accessions start with a capital letter
758 * and then have numbers
759 ------*/
760 /* new accessions start with 2 capital letters !! 1997 */
761 /* new accessions have .version !! 2/15/1999 */
762 skip_new_token = false;
763 current_token->choice = GBPARSE_INT_ACCESION;
764 dex = xgbparse_accprefix(current_col);
765 spare = current_col + dex;
766 for (; isdigit((int)*spare); spare++){
767 dex++;
768 }
769 if (accver && *spare == '.') {
770 dex++;
771 for (spare++; isdigit((int)*spare); spare++){
772 dex++;
773 }
774 }
775 if (*spare != ':'){
776 xlex_error_MACRO("ACCESSION missing \":\"")
777 retval += 10;
778 current_col--;
779 }
780 current_token->data.ptrvalue = MemNew(dex + 1);
781 StringNCpy(static_cast<char*>(current_token->data.ptrvalue), current_col, dex);
782 current_col += dex;
783
784
785 }
786 /*--move to past last "good" character---*/
787 current_col++;
788 }
789 if (!* lexed && current_token){
790 *lexed = current_token;
791 }
792 if (skip_new_token && current_token) {
793 /*---------
794 * last node points to a null (blank or white space token)
795 *-----------*/
796 if (last_token){
797 last_token->next = NULL;
798 }
799 else{
800 *lexed = NULL;
801 }
802 ValNodeFree(current_token);
803 }
804 }
805 if (line_use)
806 MemFree(line_use);
807
808 return static_cast<int>(retval);
809 }
810
811 /*----------------- xgbparse_better_be_done()-------------*/
xgbparse_better_be_done(int & num_errsPt,ValNodePtr current_token,ValNodePtr head_token,bool & keep_rawPt,int paren_count)812 static void xgbparse_better_be_done(int& num_errsPt, ValNodePtr current_token, ValNodePtr head_token, bool& keep_rawPt, int paren_count)
813 {
814 if (current_token)
815 {
816 while (current_token->choice == GBPARSE_INT_RIGHT)
817 {
818 paren_count--;
819 current_token = current_token->next;
820 if (!current_token)
821 {
822 if (paren_count)
823 {
824 char par_msg[40];
825 sprintf(par_msg, "mismatched parentheses (%d)", paren_count);
826 xgbparse_error(par_msg,
827 head_token, current_token);
828 keep_rawPt = true;
829 ++num_errsPt;
830 }
831 break;
832 }
833 }
834 }
835
836 if (paren_count)
837 {
838 xgbparse_error("text after last legal right parenthesis",
839 head_token, current_token);
840 keep_rawPt = true;
841 ++num_errsPt;
842 }
843
844 if (current_token)
845 {
846 xgbparse_error("text after end",
847 head_token, current_token);
848 keep_rawPt = true;
849 ++num_errsPt;
850 }
851 }
852
853 /**********************************************************
854 *
855 * CRef<objects::CSeq_loc> XGapToSeqLocEx(range, unknown):
856 *
857 * Gets the size of gap and constructs SeqLoc block with
858 * $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
859 *
860 **********************************************************/
XGapToSeqLocEx(Int4 range,bool unknown)861 static CRef<objects::CSeq_loc> XGapToSeqLocEx(Int4 range, bool unknown)
862 {
863 CRef<objects::CSeq_loc> ret;
864
865 if (range < 0)
866 return ret;
867
868 ret.Reset(new objects::CSeq_loc);
869 if (range == 0)
870 {
871 ret->SetNull();
872 return ret;
873 }
874
875 objects::CSeq_interval& interval = ret->SetInt();
876 interval.SetFrom(0);
877 interval.SetTo(range - 1);
878
879 objects::CSeq_id& id = interval.SetId();
880 id.SetGeneral().SetDb(unknown ? unkseqlitdbtag : seqlitdbtag);
881 id.SetGeneral().SetTag().SetId(0);
882
883 return ret;
884 }
885
886 /**********************************************************/
xgbgap(ValNodePtr & currentPt,CRef<objects::CSeq_loc> & loc,bool unknown)887 static void xgbgap(ValNodePtr& currentPt, CRef<objects::CSeq_loc>& loc, bool unknown)
888 {
889 ValNodePtr vnp_first;
890 ValNodePtr vnp_second;
891 ValNodePtr vnp_third;
892
893 vnp_first = currentPt->next;
894 if (vnp_first == NULL || vnp_first->choice != GBPARSE_INT_LEFT)
895 return;
896
897 vnp_second = vnp_first->next;
898 if (vnp_second == NULL || (vnp_second->choice != GBPARSE_INT_NUMBER &&
899 vnp_second->choice != GBPARSE_INT_RIGHT))
900 return;
901
902 if (vnp_second->choice == GBPARSE_INT_RIGHT)
903 {
904 loc->SetNull();
905 }
906 else
907 {
908 vnp_third = vnp_second->next;
909 if (vnp_third == NULL || vnp_third->choice != GBPARSE_INT_RIGHT)
910 return;
911
912 CRef<objects::CSeq_loc> new_loc = XGapToSeqLocEx(atoi((char*)vnp_second->data.ptrvalue), unknown);
913 if (new_loc.Empty())
914 return;
915
916 currentPt = currentPt->next;
917 loc = new_loc;
918 }
919
920 currentPt = currentPt->next;
921 currentPt = currentPt->next;
922 currentPt = currentPt->next;
923 }
924
925 /*------------------- xgbpintpnt()-----------*/
926
xgbpintpnt(objects::CSeq_loc & loc)927 static void xgbpintpnt(objects::CSeq_loc& loc)
928 {
929 CRef<objects::CSeq_point> point(new objects::CSeq_point);
930
931 point->SetPoint(loc.GetInt().GetFrom());
932
933 if (loc.GetInt().IsSetId())
934 point->SetId(loc.SetInt().SetId());
935
936 if (loc.GetInt().IsSetFuzz_from())
937 point->SetFuzz(loc.SetInt().SetFuzz_from());
938
939 loc.SetPnt(*point);
940 }
941
942 /*----- xgbload_number() -----*/
943
xgbload_number(TSeqPos & numPt,objects::CInt_fuzz & fuzz,bool & keep_rawPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,int take_which)944 static void xgbload_number(TSeqPos& numPt, objects::CInt_fuzz& fuzz, bool& keep_rawPt, ValNodePtr& currentPt, ValNodePtr head_token, int& num_errPt, int take_which)
945 {
946 int num_found = 0;
947 int fuzz_err = 0;
948 bool strange_sin_dot = false;
949
950 if (currentPt->choice == GBPARSE_INT_CARET)
951 {
952 xgbparse_error("duplicate carets", head_token, currentPt);
953 keep_rawPt = true;
954 ++num_errPt;
955 currentPt = currentPt->next;
956 fuzz_err = 1;
957 }
958 else if (currentPt->choice == GBPARSE_INT_GT ||
959 currentPt->choice == GBPARSE_INT_LT)
960 {
961 if (currentPt->choice == GBPARSE_INT_GT)
962 fuzz.SetLim(objects::CInt_fuzz::eLim_gt);
963 else
964 fuzz.SetLim(objects::CInt_fuzz::eLim_lt);
965
966 currentPt = currentPt->next;
967 }
968 else if (currentPt->choice == GBPARSE_INT_LEFT)
969 {
970 strange_sin_dot = true;
971 currentPt = currentPt->next;
972 fuzz.SetRange();
973
974 if (currentPt->choice == GBPARSE_INT_NUMBER)
975 {
976 fuzz.SetRange().SetMin(atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1);
977 if (take_which == TAKE_FIRST)
978 {
979 numPt = fuzz.GetRange().GetMin();
980 }
981 currentPt = currentPt->next;
982 num_found = 1;
983 }
984 else
985 fuzz_err = 1;
986
987 if (currentPt->choice != GBPARSE_INT_SINGLE_DOT)
988 fuzz_err = 1;
989 else
990 {
991 currentPt = currentPt->next;
992 if (currentPt->choice == GBPARSE_INT_NUMBER)
993 {
994 fuzz.SetRange().SetMax(atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1);
995 if (take_which == TAKE_SECOND)
996 {
997 numPt = fuzz.GetRange().GetMax();
998 }
999 currentPt = currentPt->next;
1000 }
1001 else
1002 fuzz_err = 1;
1003
1004 if (currentPt->choice == GBPARSE_INT_RIGHT)
1005 currentPt = currentPt->next;
1006 else
1007 fuzz_err = 1;
1008 }
1009
1010 }
1011 else if (currentPt->choice != GBPARSE_INT_NUMBER)
1012 {
1013 /* this prevents endless cycling, unconditionally */
1014 if (currentPt->choice != GBPARSE_INT_ONE_OF
1015 && currentPt->choice != GBPARSE_INT_ONE_OF_NUM)
1016 currentPt = currentPt->next;
1017 num_found = -1;
1018 }
1019
1020 if (!strange_sin_dot)
1021 {
1022 if (!currentPt)
1023 {
1024 xgbparse_error("unexpected end of interval tokens",
1025 head_token, currentPt);
1026 keep_rawPt = true;
1027 ++num_errPt;
1028 }
1029 else{
1030 if (currentPt->choice == GBPARSE_INT_NUMBER)
1031 {
1032 numPt = atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1;
1033 currentPt = currentPt->next;
1034 num_found = 1;
1035 }
1036 }
1037 }
1038
1039 if (fuzz_err)
1040 {
1041 xgbparse_error("Incorrect uncertainty", head_token, currentPt);
1042 keep_rawPt = true;
1043 ++num_errPt;
1044 }
1045
1046 if (num_found != 1)
1047 {
1048 keep_rawPt = true;
1049 /****************
1050 *
1051 * 10..one-of(13,15) type syntax here
1052 *
1053 ***************/
1054 if (currentPt->choice == GBPARSE_INT_ONE_OF
1055 || currentPt->choice == GBPARSE_INT_ONE_OF_NUM)
1056 {
1057 bool one_of_ok = true;
1058 bool at_end_one_of = false;
1059
1060 currentPt = currentPt->next;
1061 if (currentPt->choice != GBPARSE_INT_LEFT)
1062 {
1063 one_of_ok = false;
1064 }
1065 else
1066 {
1067 currentPt = currentPt->next;
1068 }
1069
1070 if (one_of_ok && currentPt->choice == GBPARSE_INT_NUMBER)
1071 {
1072 numPt = atoi(static_cast<char*>(currentPt->data.ptrvalue)) - 1;
1073 currentPt = currentPt->next;
1074 }
1075 else
1076 {
1077 one_of_ok = false;
1078 }
1079
1080 while (one_of_ok && !at_end_one_of && currentPt != NULL)
1081 {
1082 switch (currentPt->choice)
1083 {
1084 default:
1085 one_of_ok = false;
1086 break;
1087 case GBPARSE_INT_COMMA:
1088 case GBPARSE_INT_NUMBER:
1089 currentPt = currentPt->next;
1090 break;
1091 case GBPARSE_INT_RIGHT:
1092 currentPt = currentPt->next;
1093 at_end_one_of = true;
1094 break;
1095 }
1096 }
1097
1098 if (!one_of_ok && !at_end_one_of)
1099 {
1100 while (!at_end_one_of && currentPt != NULL)
1101 {
1102 if (currentPt->choice == GBPARSE_INT_RIGHT)
1103 at_end_one_of = true;
1104 currentPt = currentPt->next;
1105 }
1106 }
1107
1108 if (!one_of_ok){
1109
1110 xgbparse_error("bad one-of() syntax as number",
1111 head_token, currentPt);
1112 ++num_errPt;
1113 }
1114 }
1115 else
1116 {
1117 xgbparse_error("Number not found when expected",
1118 head_token, currentPt);
1119 ++num_errPt;
1120 }
1121 }
1122 }
1123
1124 /*--------------- xgbint_ver ()--------------------*/
1125 /* sometimes returns points */
xgbint_ver(bool & keep_rawPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,const TSeqIdList & seq_ids,bool accver)1126 static CRef<objects::CSeq_loc> xgbint_ver(bool& keep_rawPt, ValNodePtr& currentPt,
1127 ValNodePtr head_token, int& num_errPt, const TSeqIdList& seq_ids,
1128 bool accver)
1129 {
1130 CRef<objects::CSeq_loc> ret(new objects::CSeq_loc);
1131
1132 bool took_choice = false;
1133 char* p;
1134
1135 CRef<objects::CSeq_id> new_id;
1136 CRef<objects::CInt_fuzz> new_fuzz;
1137
1138 if (currentPt->choice == GBPARSE_INT_ACCESION)
1139 {
1140 CRef<objects::CTextseq_id> text_id(new objects::CTextseq_id);
1141
1142 if (accver == false)
1143 {
1144 text_id->SetAccession(static_cast<char*>(currentPt->data.ptrvalue));
1145 }
1146 else
1147 {
1148 p = StringChr(static_cast<char*>(currentPt->data.ptrvalue), '.');
1149 if (p == NULL)
1150 {
1151 text_id->SetAccession(static_cast<char*>(currentPt->data.ptrvalue));
1152 xgbparse_error("Missing accession's version",
1153 head_token, currentPt);
1154 }
1155 else
1156 {
1157 *p = '\0';
1158 text_id->SetAccession(static_cast<char*>(currentPt->data.ptrvalue));
1159 text_id->SetVersion(atoi(p + 1));
1160 *p = '.';
1161 }
1162 }
1163
1164 new_id.Reset(new objects::CSeq_id);
1165 if (!seq_ids.empty())
1166 {
1167 const objects::CSeq_id& first_id = *(*seq_ids.begin());
1168 if (first_id.IsEmbl())
1169 {
1170 new_id->SetEmbl(*text_id);
1171 took_choice = true;
1172 }
1173 else if (first_id.IsDdbj())
1174 {
1175 new_id->SetDdbj(*text_id);
1176 took_choice = true;
1177 }
1178 }
1179
1180 if (!took_choice) // Genbank
1181 new_id->SetGenbank(*text_id);
1182
1183 currentPt = currentPt->next;
1184 if (!currentPt)
1185 {
1186 xgbparse_error("Nothing after accession",
1187 head_token, currentPt);
1188 new_id.Reset();
1189 keep_rawPt = true;
1190 ++num_errPt;
1191 goto FATAL;
1192 }
1193 }
1194 else
1195 {
1196 if (!seq_ids.empty())
1197 new_id = *seq_ids.begin();
1198 }
1199
1200 if (currentPt->choice == GBPARSE_INT_LT)
1201 {
1202 new_fuzz.Reset(new objects::CInt_fuzz);
1203 new_fuzz->SetLim(objects::CInt_fuzz::eLim_lt);
1204
1205 currentPt = currentPt->next;
1206 if (!currentPt)
1207 {
1208 xgbparse_error("Nothing after \'<\'",
1209 head_token, currentPt);
1210 keep_rawPt = true;
1211 ++num_errPt;
1212 goto FATAL;
1213 }
1214 }
1215
1216 if (!num_errPt)
1217 {
1218 switch (currentPt->choice)
1219 {
1220 case GBPARSE_INT_ACCESION:
1221 if (new_id.NotEmpty())
1222 {
1223 xgbparse_error("duplicate accessions",
1224 head_token, currentPt);
1225 keep_rawPt = true;
1226 ++num_errPt;
1227 goto FATAL;
1228 }
1229 break;
1230 case GBPARSE_INT_CARET:
1231 xgbparse_error("caret (^) before number",
1232 head_token, currentPt);
1233 keep_rawPt = true;
1234 ++num_errPt;
1235 goto FATAL;
1236 case GBPARSE_INT_LT:
1237 if (new_id.NotEmpty())
1238 {
1239 xgbparse_error("duplicate \'<\'",
1240 head_token, currentPt);
1241 keep_rawPt = true;
1242 ++num_errPt;
1243 goto FATAL;
1244 }
1245 break;
1246 case GBPARSE_INT_GT:
1247 case GBPARSE_INT_NUMBER:
1248 case GBPARSE_INT_LEFT:
1249
1250 case GBPARSE_INT_ONE_OF_NUM:
1251 if (new_fuzz.NotEmpty())
1252 ret->SetInt().SetFuzz_from(*new_fuzz);
1253 if (new_id.NotEmpty())
1254 ret->SetInt().SetId(*new_id);
1255
1256 xgbload_number(ret->SetInt().SetFrom(), ret->SetInt().SetFuzz_from(),
1257 keep_rawPt, currentPt, head_token,
1258 num_errPt, TAKE_FIRST);
1259
1260 if (ret->GetInt().GetFuzz_from().Which() == objects::CInt_fuzz::e_not_set)
1261 ret->SetInt().ResetFuzz_from();
1262
1263 xgbcheck_range(ret->GetInt().GetFrom(), *new_id, keep_rawPt, num_errPt, head_token, currentPt);
1264
1265 if (!num_errPt)
1266 {
1267 if (currentPt)
1268 {
1269 bool in_caret = false;
1270 switch (currentPt->choice)
1271 {
1272 default:
1273 case GBPARSE_INT_JOIN:
1274 case GBPARSE_INT_COMPL:
1275 case GBPARSE_INT_SINGLE_DOT:
1276 case GBPARSE_INT_ORDER:
1277 case GBPARSE_INT_GROUP:
1278 case GBPARSE_INT_ACCESION:
1279 xgbparse_error("problem with 2nd number",
1280 head_token, currentPt);
1281 keep_rawPt = true;
1282 ++num_errPt;
1283 goto FATAL;
1284 case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1285 /*--------------but have a point, not an interval----*/
1286 xgbpintpnt(*ret);
1287 break;
1288
1289 case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1290 xgbparse_error("Missing \'..\'",
1291 head_token, currentPt);;
1292 keep_rawPt = true;
1293 ++num_errPt;
1294 goto FATAL;
1295 case GBPARSE_INT_CARET:
1296 if (ret->GetInt().IsSetFuzz_from())
1297 {
1298 xgbparse_error("\'<\' then \'^\'",
1299 head_token, currentPt);
1300 keep_rawPt = true;
1301 ++num_errPt;
1302 goto FATAL;
1303 }
1304
1305 ret->SetInt().SetFuzz_from().SetLim(objects::CInt_fuzz::eLim_tl);
1306 ret->SetInt().SetFuzz_to().SetLim(objects::CInt_fuzz::eLim_tl);
1307 in_caret = true;
1308 /*---no break on purpose ---*/
1309
1310 case GBPARSE_INT_DOT_DOT:
1311 currentPt = currentPt->next;
1312 if (currentPt == NULL)
1313 {
1314 xgbparse_error("unexpected end of usable tokens",
1315 head_token, currentPt);
1316 keep_rawPt = true;
1317 ++num_errPt;
1318 goto FATAL;
1319 }
1320 /*--no break on purpose here ---*/
1321 case GBPARSE_INT_NUMBER:
1322 case GBPARSE_INT_LEFT:
1323
1324 case GBPARSE_INT_ONE_OF_NUM: /* unlikely, but ok */
1325
1326 if (currentPt->choice == GBPARSE_INT_RIGHT)
1327 {
1328 if (ret->GetInt().IsSetFuzz_from())
1329 {
1330 xgbparse_error("\'^\' then \'>\'",
1331 head_token, currentPt);
1332 keep_rawPt = true;
1333 ++num_errPt;
1334 goto FATAL;
1335 }
1336 }
1337
1338 xgbload_number(ret->SetInt().SetTo(), ret->SetInt().SetFuzz_to(),
1339 keep_rawPt, currentPt, head_token,
1340 num_errPt, TAKE_SECOND);
1341 if (ret->GetInt().GetFuzz_to().Which() == objects::CInt_fuzz::e_not_set)
1342 ret->SetInt().ResetFuzz_to();
1343
1344 xgbcheck_range(ret->GetInt().GetTo(), *new_id, keep_rawPt, num_errPt, head_token, currentPt);
1345
1346 /*----------
1347 * The caret location implies a place (point) between two location.
1348 * This is not exactly captured by the ASN.1, but pretty close
1349 *-------*/
1350 if (in_caret)
1351 {
1352 TSeqPos to = ret->GetInt().GetTo();
1353
1354 xgbpintpnt(*ret);
1355 objects::CSeq_point& point = ret->SetPnt();
1356 if (point.GetPoint() + 1 == to)
1357 {
1358 point.SetPoint(to); /* was essentailly correct */
1359 }
1360 else
1361 {
1362 point.SetFuzz().SetRange().SetMax(to);
1363 point.SetFuzz().SetRange().SetMin(point.GetPoint());
1364 }
1365 }
1366
1367 if (ret->IsInt())
1368 {
1369 if (ret->GetInt().GetFrom() == ret->GetInt().GetTo() &&
1370 !ret->GetInt().IsSetFuzz_from() &&
1371 !ret->GetInt().IsSetFuzz_to())
1372 {
1373 /*-------if interval really a point, make is so ----*/
1374 xgbpintpnt(*ret);
1375 }
1376 }
1377 } /* end switch */
1378 }
1379 else
1380 {
1381 xgbpintpnt(*ret);
1382 }
1383 }
1384 else
1385 {
1386 goto FATAL;
1387 }
1388 break;
1389 default:
1390 xgbparse_error("No number when expected",
1391 head_token, currentPt);
1392 keep_rawPt = true;
1393 ++num_errPt;
1394 goto FATAL;
1395
1396 }
1397 }
1398
1399
1400 RETURN:
1401 return ret;
1402
1403 FATAL:
1404 ret.Reset();
1405 goto RETURN;
1406 }
1407
1408
1409 /*---------- xgbloc_ver()-----*/
1410
xgbloc_ver(bool & keep_rawPt,int & parenPt,bool & sitesPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,const TSeqIdList & seq_ids,bool accver)1411 static CRef<objects::CSeq_loc> xgbloc_ver(bool& keep_rawPt, int& parenPt,
1412 bool& sitesPt, ValNodePtr& currentPt,
1413 ValNodePtr head_token, int& num_errPt,
1414 const TSeqIdList& seq_ids, bool accver)
1415 {
1416 CRef<objects::CSeq_loc> retval;
1417
1418 bool add_nulls = false;
1419 ValNodePtr current_token = currentPt;
1420 bool did_complement = false;
1421 bool go_again;
1422
1423 do
1424 {
1425 go_again = false;
1426 switch (current_token->choice)
1427 {
1428 case GBPARSE_INT_COMPL:
1429 currentPt = currentPt->next;
1430 if (currentPt == NULL){
1431 xgbparse_error("unexpected end of usable tokens",
1432 head_token, currentPt);
1433 keep_rawPt = true;
1434 ++num_errPt;
1435 goto FATAL;
1436 }
1437 if (currentPt->choice != GBPARSE_INT_LEFT){
1438 xgbparse_error("Missing \'(\'", /* paran match ) */
1439 head_token, currentPt);
1440 keep_rawPt = true;
1441 ++num_errPt;
1442 goto FATAL;
1443 }
1444 else{
1445 ++parenPt; currentPt = currentPt->next;
1446 if (!currentPt){
1447 xgbparse_error("illegal null contents",
1448 head_token, currentPt);
1449 keep_rawPt = true;
1450 ++num_errPt;
1451 goto FATAL;
1452 }
1453 else{
1454 if (currentPt->choice == GBPARSE_INT_RIGHT){ /* paran match ( */
1455 xgbparse_error("Premature \')\'",
1456 head_token, currentPt);
1457 keep_rawPt = true;
1458 ++num_errPt;
1459 goto FATAL;
1460 }
1461 else{
1462 retval = xgbloc_ver(keep_rawPt, parenPt, sitesPt, currentPt,
1463 head_token, num_errPt, seq_ids, accver);
1464
1465 if (retval.NotEmpty())
1466 retval = objects::sequence::SeqLocRevCmpl(*retval, nullptr);
1467
1468 did_complement = true;
1469 if (currentPt){
1470 if (currentPt->choice != GBPARSE_INT_RIGHT){
1471 xgbparse_error("Missing \')\'",
1472 head_token, currentPt);
1473 keep_rawPt = true;
1474 ++num_errPt;
1475 goto FATAL;
1476 }
1477 else{
1478 --parenPt;
1479 currentPt = currentPt->next;
1480 }
1481 }
1482 else{
1483 xgbparse_error("Missing \')\'",
1484 head_token, currentPt);
1485 keep_rawPt = true;
1486 ++num_errPt;
1487 goto FATAL;
1488 }
1489 }
1490 }
1491 }
1492 break;
1493 /* REAL LOCS */
1494 case GBPARSE_INT_JOIN:
1495 retval.Reset(new objects::CSeq_loc);
1496 retval->SetMix();
1497 break;
1498 case GBPARSE_INT_ORDER:
1499 retval.Reset(new objects::CSeq_loc);
1500 retval->SetMix();
1501 add_nulls = true;
1502 break;
1503 case GBPARSE_INT_GROUP:
1504 retval.Reset(new objects::CSeq_loc);
1505 retval->SetMix();
1506 keep_rawPt = true;
1507 break;
1508 case GBPARSE_INT_ONE_OF:
1509 retval.Reset(new objects::CSeq_loc);
1510 retval->SetEquiv();
1511 break;
1512
1513 /* ERROR */
1514 case GBPARSE_INT_STRING:
1515 xgbparse_error("string in loc",
1516 head_token, current_token);
1517 keep_rawPt = true;
1518 ++num_errPt;
1519 goto FATAL;
1520 /*--- no break on purpose---*/
1521 default:
1522 case GBPARSE_INT_UNKNOWN:
1523 case GBPARSE_INT_RIGHT:
1524 case GBPARSE_INT_DOT_DOT:
1525 case GBPARSE_INT_COMMA:
1526 case GBPARSE_INT_SINGLE_DOT:
1527 xgbparse_error("illegal initial loc token",
1528 head_token, currentPt);
1529 keep_rawPt = true;
1530 ++num_errPt;
1531 goto FATAL;
1532
1533 /* Interval, occurs on recursion */
1534 case GBPARSE_INT_GAP:
1535 xgbgap(currentPt, retval, false);
1536 break;
1537 case GBPARSE_INT_UNK_GAP:
1538 xgbgap(currentPt, retval, true);
1539 break;
1540
1541 case GBPARSE_INT_ACCESION:
1542 case GBPARSE_INT_CARET:
1543 case GBPARSE_INT_GT:
1544 case GBPARSE_INT_LT:
1545 case GBPARSE_INT_NUMBER:
1546 case GBPARSE_INT_LEFT:
1547
1548 case GBPARSE_INT_ONE_OF_NUM:
1549
1550 retval = xgbint_ver(keep_rawPt, currentPt, head_token, num_errPt, seq_ids, accver);
1551 break;
1552
1553 case GBPARSE_INT_REPLACE:
1554 /*-------illegal at this level --*/
1555 xgbparse_error("illegal replace",
1556 head_token, currentPt);
1557 keep_rawPt = true;
1558 ++num_errPt;
1559 goto FATAL;
1560 case GBPARSE_INT_SITES:
1561 sitesPt = true;
1562 go_again = true;
1563 currentPt = currentPt->next;
1564 break;
1565 }
1566 } while (go_again && currentPt);
1567
1568 if (!num_errPt)
1569 {
1570 if (retval.NotEmpty() && !retval->IsNull())
1571 {
1572 if (!retval->IsInt() && !retval->IsPnt()
1573 && !did_complement)
1574 {
1575 /*--------
1576 * ONLY THE CHOICE has been set. the "join", etc. only has been noted
1577 *----*/
1578 currentPt = currentPt->next;
1579 if (!currentPt)
1580 {
1581 xgbparse_error("unexpected end of interval tokens",
1582 head_token, currentPt);
1583 keep_rawPt = true;
1584 ++num_errPt;
1585 goto FATAL;
1586 }
1587 else
1588 {
1589 if (currentPt->choice != GBPARSE_INT_LEFT)
1590 {
1591 xgbparse_error("Missing \'(\'",
1592 head_token, currentPt); /* paran match ) */
1593 keep_rawPt = true;
1594 ++num_errPt;
1595 goto FATAL;
1596 }
1597 else{
1598 ++parenPt;
1599 currentPt = currentPt->next;
1600 if (!currentPt)
1601 {
1602 xgbparse_error("illegal null contents",
1603 head_token, currentPt);
1604 keep_rawPt = true;
1605 ++num_errPt;
1606 goto FATAL;
1607 }
1608 else
1609 {
1610 if (currentPt->choice == GBPARSE_INT_RIGHT)
1611 { /* paran match ( */
1612 xgbparse_error("Premature \')\'",
1613 head_token, currentPt);
1614 keep_rawPt = true;
1615 ++num_errPt;
1616 goto FATAL;
1617 }
1618 else
1619 {
1620 while (!num_errPt && currentPt)
1621 {
1622 if (currentPt->choice == GBPARSE_INT_RIGHT)
1623 {
1624 while (currentPt->choice == GBPARSE_INT_RIGHT)
1625 {
1626 parenPt--;
1627 currentPt = currentPt->next;
1628 if (!currentPt)
1629 break;
1630 }
1631 break;
1632 }
1633
1634 if (!currentPt)
1635 break;
1636
1637 CRef<objects::CSeq_loc> next_loc = xgbloc_ver(keep_rawPt, parenPt, sitesPt,
1638 currentPt, head_token, num_errPt,
1639 seq_ids, accver);
1640
1641 if (next_loc.NotEmpty())
1642 {
1643 if (retval->IsMix())
1644 retval->SetMix().AddSeqLoc(*next_loc);
1645 else // equiv
1646 retval->SetEquiv().Add(*next_loc);
1647 }
1648
1649 if (!currentPt || currentPt->choice == GBPARSE_INT_RIGHT)
1650 break;
1651
1652 if (currentPt->choice == GBPARSE_INT_COMMA)
1653 {
1654 currentPt = currentPt->next;
1655 if (add_nulls)
1656 {
1657 CRef<objects::CSeq_loc> null_loc(new objects::CSeq_loc);
1658 null_loc->SetNull();
1659
1660 if (retval->IsMix())
1661 retval->SetMix().AddSeqLoc(*null_loc);
1662 else // equiv
1663 retval->SetEquiv().Add(*null_loc);
1664 }
1665 }
1666 else{
1667 xgbparse_error("Illegal token after interval",
1668 head_token, currentPt);
1669 keep_rawPt = true;
1670 ++num_errPt;
1671 goto FATAL;
1672 }
1673 }
1674 }
1675 }
1676 if (currentPt == NULL)
1677 {
1678 xgbparse_error("unexpected end of usable tokens",
1679 head_token, currentPt);
1680 keep_rawPt = true;
1681 ++num_errPt;
1682 goto FATAL;
1683 }
1684 else
1685 {
1686 if (currentPt->choice != GBPARSE_INT_RIGHT)
1687 {
1688 xgbparse_error("Missing \')\'" /* paran match ) */,
1689 head_token, currentPt);
1690 keep_rawPt = true;
1691 ++num_errPt;
1692 goto FATAL;
1693 }
1694 else
1695 {
1696 parenPt--;
1697 currentPt = currentPt->next;
1698 }
1699 }
1700 }
1701 }
1702 }
1703 }
1704 }
1705
1706 FATAL:
1707 if (num_errPt)
1708 {
1709 if (retval.NotEmpty())
1710 {
1711 retval->Reset();
1712 retval->SetWhole().Assign(*(*seq_ids.begin()));
1713 }
1714 }
1715
1716 return retval;
1717 }
1718
1719 /*-------- xgbreplace_ver() --------*/
1720
xgbreplace_ver(bool & keep_rawPt,int & parenPt,bool & sitesPt,ValNodePtr & currentPt,ValNodePtr head_token,int & num_errPt,const TSeqIdList & seq_ids,bool accver)1721 static CRef<objects::CSeq_loc> xgbreplace_ver(bool& keep_rawPt, int& parenPt,
1722 bool& sitesPt, ValNodePtr& currentPt,
1723 ValNodePtr head_token, int& num_errPt,
1724 const TSeqIdList& seq_ids, bool accver)
1725 {
1726 CRef<objects::CSeq_loc> ret;
1727
1728 keep_rawPt = true;
1729 currentPt = currentPt->next;
1730
1731 if (currentPt->choice == GBPARSE_INT_LEFT)
1732 {
1733 currentPt = currentPt->next;
1734 ret = xgbloc_ver(keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1735 num_errPt, seq_ids, accver);
1736 if (!currentPt)
1737 {
1738 xgbparse_error("unexpected end of interval tokens",
1739 head_token, currentPt);
1740 keep_rawPt = true;
1741 ++num_errPt;
1742 }
1743 else
1744 {
1745
1746 if (currentPt->choice != GBPARSE_INT_COMMA)
1747 {
1748 xgbparse_error("Missing comma after first location in replace",
1749 head_token, currentPt);
1750 ++num_errPt;
1751 }
1752 }
1753 }
1754 else
1755 {
1756 xgbparse_error("Missing \'(\'" /* paran match ) */
1757 , head_token, currentPt);
1758 ++num_errPt;
1759 }
1760
1761 return ret;
1762 }
1763
1764 /*---------- xgbparseint_ver()-----*/
1765
xgbparseint_ver(char * raw_intervals,bool & keep_rawPt,bool & sitesPt,int & num_errsPt,const TSeqIdList & seq_ids,bool accver)1766 CRef<objects::CSeq_loc> xgbparseint_ver(char* raw_intervals, bool& keep_rawPt, bool& sitesPt, int& num_errsPt,
1767 const TSeqIdList& seq_ids, bool accver)
1768 {
1769 CRef<objects::CSeq_loc> ret;
1770
1771 int paren_count = 0;
1772 bool go_again = false;
1773
1774 keep_rawPt = false;
1775 sitesPt = false;
1776
1777 ValNodePtr head_token = NULL,
1778 current_token = NULL;
1779
1780 num_errsPt = xgbparselex_ver(raw_intervals, &head_token, accver);
1781
1782 if (head_token == NULL)
1783 {
1784 num_errsPt = 1;
1785 return ret;
1786 }
1787
1788 if ( !num_errsPt)
1789 {
1790 current_token = head_token;
1791 xfind_one_of_num(head_token);
1792
1793 do
1794 {
1795 go_again = false;
1796 if (current_token)
1797 {
1798 switch (current_token->choice)
1799 {
1800 case GBPARSE_INT_JOIN:
1801 case GBPARSE_INT_ORDER:
1802 case GBPARSE_INT_GROUP:
1803 case GBPARSE_INT_ONE_OF:
1804 case GBPARSE_INT_COMPL:
1805 ret = xgbloc_ver(keep_rawPt, paren_count, sitesPt, current_token,
1806 head_token, num_errsPt, seq_ids, accver);
1807 /* need to check that out of tokens here */
1808 xgbparse_better_be_done(num_errsPt, current_token, head_token, keep_rawPt, paren_count);
1809 break;
1810
1811 case GBPARSE_INT_STRING:
1812 xgbparse_error("string in loc", head_token, current_token);
1813 keep_rawPt = true;
1814 ++num_errsPt;
1815 /* no break on purpose */
1816 case GBPARSE_INT_UNKNOWN:
1817 default:
1818 case GBPARSE_INT_RIGHT:
1819 case GBPARSE_INT_DOT_DOT:
1820 case GBPARSE_INT_COMMA:
1821 case GBPARSE_INT_SINGLE_DOT:
1822
1823 xgbparse_error("illegal initial token", head_token, current_token);
1824 keep_rawPt = true;
1825 ++num_errsPt;
1826 current_token = current_token->next;
1827 break;
1828
1829 case GBPARSE_INT_ACCESION:
1830 /*--- no warn, but strange ---*/
1831 /*-- no break on purpose ---*/
1832
1833 case GBPARSE_INT_CARET: case GBPARSE_INT_GT:
1834 case GBPARSE_INT_LT: case GBPARSE_INT_NUMBER:
1835 case GBPARSE_INT_LEFT:
1836
1837 case GBPARSE_INT_ONE_OF_NUM:
1838
1839 ret = xgbint_ver(keep_rawPt, current_token, head_token, num_errsPt, seq_ids, accver);
1840
1841 /* need to check that out of tokens here */
1842 xgbparse_better_be_done(num_errsPt, current_token, head_token, keep_rawPt, paren_count);
1843 break;
1844
1845 case GBPARSE_INT_REPLACE:
1846 ret = xgbreplace_ver(keep_rawPt, paren_count, sitesPt, current_token,
1847 head_token, num_errsPt, seq_ids, accver);
1848 keep_rawPt = true;
1849 /*---all errors handled within this function ---*/
1850 break;
1851 case GBPARSE_INT_SITES:
1852 sitesPt = true;
1853 go_again = true;
1854 current_token = current_token->next;
1855 break;
1856 }
1857 }
1858 } while (go_again && current_token);
1859 }
1860 else
1861 {
1862 keep_rawPt = true;
1863 }
1864
1865 if ( head_token)
1866 ValNodeFreeData(head_token);
1867
1868 if (num_errsPt)
1869 ret.Reset();
1870
1871 return ret;
1872 }
1873
1874 END_NCBI_SCOPE
1875